私はスクレイピング ドメインに少し慣れていないので、スパイダーの次のコードを管理できました。
import os
os.environ.setdefault('SCRAPY_SETTINGS_MODULE', 'thesentientspider.settings')
from scrapy.selector import HtmlXPathSelector
from scrapy.spider import BaseSpider
from scrapy.http import Request
from scrapy.utils.response import get_base_url
from urlparse import urljoin
from thesentientspider.items import RestaurantDetails, UserReview
import urllib
from scrapy.conf import settings
import pymongo
from pymongo import MongoClient
#MONGODB Settings
MongoDBServer=settings['MONGODB_SERVER']
MongoDBPort=settings['MONGODB_PORT']
class ZomatoSpider(BaseSpider):
name = 'zomatoSpider'
allowed_domains = ['zomato.com']
CITY=["hyderabad"]
start_urls = [
'http://www.zomato.com/%s/restaurants/' %cityName for cityName in CITY
]
def parse(self, response):
hxs = HtmlXPathSelector(response)
BASE_URL=get_base_url(response)
ただし、スクレイピー クロール zomatoSpiderコマンドを使用して起動しようとすると、次のエラーがスローされます。
Traceback (most recent call last):
File "/usr/bin/scrapy", line 4, in <module>
execute()
File "/usr/lib/pymodules/python2.6/scrapy/cmdline.py", line 131, in execute
_run_print_help(parser, _run_command, cmd, args, opts)
File "/usr/lib/pymodules/python2.6/scrapy/cmdline.py", line 76, in _run_print_help
func(*a, **kw)
File "/usr/lib/pymodules/python2.6/scrapy/cmdline.py", line 138, in _run_command
cmd.run(args, opts)
File "/usr/lib/pymodules/python2.6/scrapy/commands/crawl.py", line 43, in run
spider = self.crawler.spiders.create(spname, **opts.spargs)
File "/usr/lib/pymodules/python2.6/scrapy/command.py", line 33, in crawler
self._crawler.configure()
File "/usr/lib/pymodules/python2.6/scrapy/crawler.py", line 40, in configure
self.spiders = spman_cls.from_crawler(self)
File "/usr/lib/pymodules/python2.6/scrapy/spidermanager.py", line 35, in from_crawler
sm = cls.from_settings(crawler.settings)
File "/usr/lib/pymodules/python2.6/scrapy/spidermanager.py", line 31, in from_settings
return cls(settings.getlist('SPIDER_MODULES'))
File "/usr/lib/pymodules/python2.6/scrapy/spidermanager.py", line 23, in __init__
self._load_spiders(module)
File "/usr/lib/pymodules/python2.6/scrapy/spidermanager.py", line 26, in _load_spiders
for spcls in iter_spider_classes(module):
File "/usr/lib/pymodules/python2.6/scrapy/utils/spider.py", line 21, in iter_spider_classes
issubclass(obj, BaseSpider) and \
TypeError: issubclass() arg 1 must be a class
誰かが根本的な原因を指摘し、コード スニペットを介して同じことを修正することを提案できますか?