This article is an example of a Python implementation of running scrapy in a thread. Shared for your reference. Specific as follows:
If you wish to call scrapy from within a written program, you can make scrapy run in a thread with the following code.
""" Code to run Scrapy crawler in a thread - works on Scrapy 0.8 """ import threading, Queue from import reactor from import dispatcher from import scrapymanager from import scrapyengine from import signals class CrawlerThread(): def __init__(self): .__init__(self) = False def run(self): = True (control_reactor=False) () (installSignalHandlers=False) def crawl(self, *args): if not : raise RuntimeError("CrawlerThread not running") self._call_and_block_until_signal(signals.spider_closed, \ , *args) def stop(self): () def _call_and_block_until_signal(self, signal, f, *a, **kw): q = () def unblock(): (None) (unblock, signal=signal) (f, *a, **kw) () # Usage example below: import os ('SCRAPY_SETTINGS_MODULE', '') from import dispatcher from import signals from import settings from import CrawlerThread ['LOG_ENABLED'] = False # avoid log noise def item_passed(item): print "Just scraped item:", item (item_passed, signal=signals.item_passed) crawler = CrawlerThread() print "Starting crawler thread..." () print "Crawling ...." (') # blocking call print "Crawling ..." ('') # blocking call print "Stopping crawler thread..." ()
I hope that what I have described in this article will help you in your Python programming.