SoFunction
Updated on 2025-03-02

Python implements a method to run scrapy from a script

#!/usr/bin/python
import os
('SCRAPY_SETTINGS_MODULE', '') #Must be at the top before other imports
from scrapy import log, signals, project
from import dispatcher
from import settings
from import CrawlerProcess
from multiprocessing import Process, Queue
class CrawlerScript():
    def __init__(self):
        = CrawlerProcess(settings)
        if not hasattr(project, 'crawler'):
            ()
        ()
        = []
        (self._item_passed, signals.item_passed)
    def _item_passed(self, item):
        (item)
    def _crawl(self, queue, spider_name):
        spider = (spider_name)
        if spider:
            .append_spider(spider)
        ()
        ()
        ()
    def crawl(self, spider):
        queue = Queue()
        p = Process(target=self._crawl, args=(queue, spider,))
        ()
        ()
        return (True)
# Usage
if __name__ == "__main__":
    ()
    """
    This example runs spider1 and then spider2 three times.
    """
    items = list()
    crawler = CrawlerScript()
    (('spider1'))
    for i in range(3):
        (('spider2'))
    print items