2

私はスクレイピング ドメインに少し慣れていないので、スパイダーの次のコードを管理できました。

import os
os.environ.setdefault('SCRAPY_SETTINGS_MODULE', 'thesentientspider.settings')
from scrapy.selector import HtmlXPathSelector
from scrapy.spider import BaseSpider
from scrapy.http import Request
from scrapy.utils.response import get_base_url
from urlparse import urljoin
from thesentientspider.items import RestaurantDetails, UserReview
import urllib
from scrapy.conf import settings
import pymongo
from pymongo import MongoClient

#MONGODB Settings
MongoDBServer=settings['MONGODB_SERVER']
MongoDBPort=settings['MONGODB_PORT']

class ZomatoSpider(BaseSpider):
    name = 'zomatoSpider'
    allowed_domains = ['zomato.com']
    CITY=["hyderabad"]
    start_urls = [
        'http://www.zomato.com/%s/restaurants/'  %cityName for cityName in CITY
        ]

    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        BASE_URL=get_base_url(response)

ただし、スクレイピー クロール zomatoSpiderコマンドを使用して起動しようとすると、次のエラーがスローされます。

Traceback (most recent call last):
  File "/usr/bin/scrapy", line 4, in <module>
    execute()
  File "/usr/lib/pymodules/python2.6/scrapy/cmdline.py", line 131, in execute
    _run_print_help(parser, _run_command, cmd, args, opts)
  File "/usr/lib/pymodules/python2.6/scrapy/cmdline.py", line 76, in _run_print_help
    func(*a, **kw)
  File "/usr/lib/pymodules/python2.6/scrapy/cmdline.py", line 138, in _run_command
    cmd.run(args, opts)
  File "/usr/lib/pymodules/python2.6/scrapy/commands/crawl.py", line 43, in run
    spider = self.crawler.spiders.create(spname, **opts.spargs)
  File "/usr/lib/pymodules/python2.6/scrapy/command.py", line 33, in crawler
    self._crawler.configure()
  File "/usr/lib/pymodules/python2.6/scrapy/crawler.py", line 40, in configure
    self.spiders = spman_cls.from_crawler(self)
  File "/usr/lib/pymodules/python2.6/scrapy/spidermanager.py", line 35, in from_crawler
    sm = cls.from_settings(crawler.settings)
  File "/usr/lib/pymodules/python2.6/scrapy/spidermanager.py", line 31, in from_settings
    return cls(settings.getlist('SPIDER_MODULES'))
  File "/usr/lib/pymodules/python2.6/scrapy/spidermanager.py", line 23, in __init__
    self._load_spiders(module)
  File "/usr/lib/pymodules/python2.6/scrapy/spidermanager.py", line 26, in _load_spiders
    for spcls in iter_spider_classes(module):
  File "/usr/lib/pymodules/python2.6/scrapy/utils/spider.py", line 21, in iter_spider_classes
    issubclass(obj, BaseSpider) and \
TypeError: issubclass() arg 1 must be a class

誰かが根本的な原因を指摘し、コード スニペットを介して同じことを修正することを提案できますか?

4

1 に答える 1