[settings]
default = myproject.settings
[deploy:local]
url = http://localhost:6800/
username = admin
password = 123456
project = myproject
from scrapy.item import Item, Field
from scrapy.loader.processors import TakeFirst, MapCompose, Join, Compose
def validate_price(value):
if value.isdigit():
return value
else:
raise ValueError("Invalid price")
class ProductItem(Item):
name = Field(output_processor=TakeFirst())
price = Field(input_processor=MapCompose(validate_price), output_processor=TakeFirst())
description = Field(output_processor=Join())
class ProxyMiddleware:
def __init__(self, proxy_pool_url):
self.proxy_pool_url = proxy_pool_url
@classmethod
def from_crawler(cls, crawler):
return cls(
proxy_pool_url=crawler.settings.get('PROXY_POOL_URL')
)
def process_request(self, request, spider):
response = requests.get(self.proxy_pool_url)
if response.status_code == 200:
proxy = response.text
request.meta['proxy'] = proxy