python - Scrapy follow link and collect email -


i need saving email scrapy. row in .csv file emails supposed collected blank. appreciated. here code:

# -*- coding: utf-8 -*- import scrapy   # item class included here  class dmozitem(scrapy.item):     # define fields item here like:     link = scrapy.field()     attr = scrapy.field()   class dmozspider(scrapy.spider):     name = "dmoz"     allowed_domains = ["craigslist.org"]     start_urls = [     "http://chicago.craigslist.org/search/vgm?"     ]      base_url = 'http://chicago.craigslist.org/'      def parse(self, response):         links = response.xpath('//a[@class="hdrlnk"]/@href').extract()         link in links:             absolute_url = self.base_url + link             yield scrapy.request(absolute_url, callback=self.parse_attr)      def parse_attr(self, response):         item = dmozitem()         item["link"] = response.url         item["attr"] = "".join(response.xpath("//div[@class='anonemail']//text()").extract())         return item 

in order see email on craiglist item page, 1 click "reply" button, initiates new request "reply/chi/vgm/" url. need simulate in scrapy issuing new request , parsing results in callback:

# -*- coding: utf-8 -*- import re import scrapy   # item class included here class dmozitem(scrapy.item):     # define fields item here like:     link = scrapy.field()     attr = scrapy.field()   class dmozspider(scrapy.spider):     name = "dmoz"     allowed_domains = ["craigslist.org"]     start_urls = [     "http://chicago.craigslist.org/search/vgm?"     ]      base_url = 'http://chicago.craigslist.org/'      def parse(self, response):         links = response.xpath('//a[@class="hdrlnk"]/@href').extract()         link in links:             absolute_url = self.base_url + link             yield scrapy.request(absolute_url, callback=self.parse_attr)      def parse_attr(self, response):         match = re.search(r"(\w+)\.html", response.url)         if match:             item_id = match.group(1)             url = self.base_url + "reply/chi/vgm/" + item_id              item = dmozitem()             item["link"] = response.url              return scrapy.request(url, meta={'item': item}, callback=self.parse_contact)      def parse_contact(self, response):         item = response.meta['item']         item["attr"] = "".join(response.xpath("//div[@class='anonemail']//text()").extract())         return item 

Comments

Popular posts from this blog

IF statement in MySQL trigger -

c++ - What does MSC in "// appease MSC" comments mean? -

javascript - Blogger related post gadget image Resize s72-c [ Need Expert Help ] -