Ausgabe der neuen DB Einträge
This commit is contained in:
parent
bad48e1627
commit
cfbbb9ee3d
2399 changed files with 843193 additions and 43 deletions
|
|
@ -0,0 +1,10 @@
|
|||
import scrapy
|
||||
|
||||
|
||||
class $classname(scrapy.Spider):
|
||||
name = '$name'
|
||||
allowed_domains = ['$domain']
|
||||
start_urls = ['http://$domain/']
|
||||
|
||||
def parse(self, response):
|
||||
pass
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
import scrapy
|
||||
from scrapy.linkextractors import LinkExtractor
|
||||
from scrapy.spiders import CrawlSpider, Rule
|
||||
|
||||
|
||||
class $classname(CrawlSpider):
|
||||
name = '$name'
|
||||
allowed_domains = ['$domain']
|
||||
start_urls = ['http://$domain/']
|
||||
|
||||
rules = (
|
||||
Rule(LinkExtractor(allow=r'Items/'), callback='parse_item', follow=True),
|
||||
)
|
||||
|
||||
def parse_item(self, response):
|
||||
item = {}
|
||||
#item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get()
|
||||
#item['name'] = response.xpath('//div[@id="name"]').get()
|
||||
#item['description'] = response.xpath('//div[@id="description"]').get()
|
||||
return item
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
from scrapy.spiders import CSVFeedSpider
|
||||
|
||||
|
||||
class $classname(CSVFeedSpider):
|
||||
name = '$name'
|
||||
allowed_domains = ['$domain']
|
||||
start_urls = ['http://$domain/feed.csv']
|
||||
# headers = ['id', 'name', 'description', 'image_link']
|
||||
# delimiter = '\t'
|
||||
|
||||
# Do any adaptations you need here
|
||||
#def adapt_response(self, response):
|
||||
# return response
|
||||
|
||||
def parse_row(self, response, row):
|
||||
i = {}
|
||||
#i['url'] = row['url']
|
||||
#i['name'] = row['name']
|
||||
#i['description'] = row['description']
|
||||
return i
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
from scrapy.spiders import XMLFeedSpider
|
||||
|
||||
|
||||
class $classname(XMLFeedSpider):
|
||||
name = '$name'
|
||||
allowed_domains = ['$domain']
|
||||
start_urls = ['http://$domain/feed.xml']
|
||||
iterator = 'iternodes' # you can change this; see the docs
|
||||
itertag = 'item' # change it accordingly
|
||||
|
||||
def parse_node(self, response, selector):
|
||||
item = {}
|
||||
#item['url'] = selector.select('url').get()
|
||||
#item['name'] = selector.select('name').get()
|
||||
#item['description'] = selector.select('description').get()
|
||||
return item
|
||||
Loading…
Add table
Add a link
Reference in a new issue