20 lines
633 B
Cheetah
20 lines
633 B
Cheetah
import scrapy
|
|
from scrapy.linkextractors import LinkExtractor
|
|
from scrapy.spiders import CrawlSpider, Rule
|
|
|
|
|
|
class $classname(CrawlSpider):
|
|
name = '$name'
|
|
allowed_domains = ['$domain']
|
|
start_urls = ['http://$domain/']
|
|
|
|
rules = (
|
|
Rule(LinkExtractor(allow=r'Items/'), callback='parse_item', follow=True),
|
|
)
|
|
|
|
def parse_item(self, response):
|
|
item = {}
|
|
#item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get()
|
|
#item['name'] = response.xpath('//div[@id="name"]').get()
|
|
#item['description'] = response.xpath('//div[@id="description"]').get()
|
|
return item
|