Ausgabe der neuen DB Einträge

This commit is contained in:
hubobel 2022-01-02 21:50:48 +01:00
parent bad48e1627
commit cfbbb9ee3d
2399 changed files with 843193 additions and 43 deletions

View file

@ -0,0 +1,10 @@
import scrapy
class $classname(scrapy.Spider):
name = '$name'
allowed_domains = ['$domain']
start_urls = ['http://$domain/']
def parse(self, response):
pass

View file

@ -0,0 +1,20 @@
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
class $classname(CrawlSpider):
name = '$name'
allowed_domains = ['$domain']
start_urls = ['http://$domain/']
rules = (
Rule(LinkExtractor(allow=r'Items/'), callback='parse_item', follow=True),
)
def parse_item(self, response):
item = {}
#item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get()
#item['name'] = response.xpath('//div[@id="name"]').get()
#item['description'] = response.xpath('//div[@id="description"]').get()
return item

View file

@ -0,0 +1,20 @@
from scrapy.spiders import CSVFeedSpider
class $classname(CSVFeedSpider):
name = '$name'
allowed_domains = ['$domain']
start_urls = ['http://$domain/feed.csv']
# headers = ['id', 'name', 'description', 'image_link']
# delimiter = '\t'
# Do any adaptations you need here
#def adapt_response(self, response):
# return response
def parse_row(self, response, row):
i = {}
#i['url'] = row['url']
#i['name'] = row['name']
#i['description'] = row['description']
return i

View file

@ -0,0 +1,16 @@
from scrapy.spiders import XMLFeedSpider
class $classname(XMLFeedSpider):
name = '$name'
allowed_domains = ['$domain']
start_urls = ['http://$domain/feed.xml']
iterator = 'iternodes' # you can change this; see the docs
itertag = 'item' # change it accordingly
def parse_node(self, response, selector):
item = {}
#item['url'] = selector.select('url').get()
#item['name'] = selector.select('name').get()
#item['description'] = selector.select('description').get()
return item