0

このスクリプトを使用しました

from twisted.internet import reactor, threads
from urlparse import urlparse
import httplib
import itertools


concurrent = 200
finished=itertools.count(1)
reactor.suggestThreadPoolSize(concurrent)

def getStatus(ourl):
    url = urlparse(ourl)
    conn = httplib.HTTPConnection(url.netloc)   
    conn.request("HEAD", url.path)
    res = conn.getresponse()
    return res.status

def processResponse(response,url):
    print response, url
    processedOne()

def processError(error,url):
    print "error", url#, error
    processedOne()

def processedOne():
    if finished.next()==added:
        reactor.stop()

def addTask(url):
    req = threads.deferToThread(getStatus, url)
    req.addCallback(processResponse, url)
    req.addErrback(processError, url)   

added=0
for url in open('urllist.txt'):
    added+=1
    addTask(url.strip())

try:
    reactor.run()
except KeyboardInterrupt:
    reactor.stop()

スクリプトを実行しようとすると $ python test.py

URLを出力するだけで、cUrlを実行したり、HTTPリクエストを送信したりしません..

それぞれのHTTPまたはcURLプロセスを送信するにはどうすればよいですか

ありがとう

4

2 に答える 2

0

inlineCallbacksと を使用してテストされたコードdeferToThread。またdefer.gatherResults、すべての遅延オブジェクトがいつ処理されたかを知るために使用します (OP の counter メソッドの代わりに):

from twisted.internet import reactor, defer, utils
from twisted.internet.threads import deferToThread
from urlparse import urlparse
import httplib

threadDeferred = deferToThread.__get__

@threadDeferred
def get_url_head(url_arg):
  url = urlparse(url_arg)
  conn = httplib.HTTPConnection(url.netloc)   
  conn.request("HEAD", url.path)
  res = conn.getresponse()
  conn.close()
  return res.status

@defer.inlineCallbacks
def check_url(sem,url_arg):
  yield sem.acquire()
  try:
    result = yield get_url_head(url_arg)
    defer.returnValue(result)
  finally:
    sem.release()

@defer.inlineCallbacks
def run(reactor,SEMAPHORE_SIZE=10):
  sem = defer.DeferredSemaphore(SEMAPHORE_SIZE)
  deferreds = []
  failed_urls = []
  responded_urls = []
  with open('urllist.txt','r') as f:
    for line in f:
      url_arg = line.strip()
      d = check_url(sem,url_arg)
      d.addCallback(processResult,url_arg,responded_urls).addErrback(processErr,url_arg,failed_urls)
      deferreds.append(d)
  res = yield defer.gatherResults(deferreds)
  # Do something else with failed_urls and responded_urls
  reactor.callLater(0,reactor.stop)

def main():
  from twisted.internet import reactor
  reactor.callWhenRunning(run,reactor)
  reactor.run()

def processResult(result,url_arg,responded_urls):
  print "Reponse %s from %s" % (result,url_arg)
  responded_urls.append((url_arg,result))

def processErr(err,url_arg,failed_urls):
  print "Error checking %s: %s" % (url_arg,repr(err.value))
  failed_urls.append((url_arg,err.value))

if __name__ == '__main__':
  main()
于 2013-08-27T20:10:42.007 に答える
0

URLの形式に「http://」が含まれていない場合、これは機能するはずですが、 「http://」が含まれている場合は、コメントに解決策があります

import httplib

def requester(url):
    host = url.split('/')[0]
    #if urls do contain 'http://' -->  host = url.split('/')[2].replace('http://','')
    req = url[url.find(host)+len(host):]
    conn = httplib.HTTPConnection(host)
    conn.request("HEAD","/"+req)
    response = conn.getresponse()
    print response.status, response.reason

    #if you want data...
    #data = response.read()
    #print data

for url in open(urls.txt):
    try:
        requester(url)
    except Error,e:
        print Error, e

さらに、httplibをチェックアウトすることをお勧めします

于 2013-08-27T20:41:56.417 に答える