[settings] default = myproject.settings [deploy:local] url = http://localhost:6800/ username = admin password = 123456 project = myproject from scrapy.item import Item, Field from scrapy.loader.processors import TakeFirst, MapCompose, Join, Compose def validate_price(value): if value.isdigit(): return value else: raise ValueError("Invalid price") class ProductItem(Item): name = Field(output_processor=TakeFirst()) price = Field(input_processor=MapCompose(validate_price), output_processor=TakeFirst()) description = Field(output_processor=Join()) class ProxyMiddleware: def __init__(self, proxy_pool_url): self.proxy_pool_url = proxy_pool_url @classmethod def from_crawler(cls, crawler): return cls( proxy_pool_url=crawler.settings.get('PROXY_POOL_URL') ) def process_request(self, request, spider): response = requests.get(self.proxy_pool_url) if response.status_code == 200: proxy = response.text request.meta['proxy'] = proxy


上一篇:
下一篇:
切换中文