cURL を使用してプロキシ経由で XML ページに接続しています。何らかの理由でページに接続されていません。解析は問題ではないので、コードには含めません。
from bs4 import BeautifulSoup
import time #added for curl
import subprocess #added for curl
import os #added for curl
file_name = raw_input("Type the name of the new file you will create: ")
g = open(file_name+".txt",'w')
g.write("---XML Parse---\n")
curlURL= 'F:\Downloads\curl-7.31.0-rtmp-ssh2-ssl-sspi-zlib-idn-static-bin-w32\curl.exe'
with open("list.txt") as f: #file from which information will be read and used in link
for line in f:
g.write("\nPage ID: "+line.rstrip('\n')+"\n")
link = "https://somewebsite.com/+line.rstrip('\n')"
args = (curlURL+ ' -L ' +link+ ' -o c:\\temp.txt --proxy-ntlm -x http://myproxy:80 -k -U:') #using a proxy
print args
sp = subprocess.Popen(args) #run curl
sp.wait() #Wait for it to finish before proceeding
xml_string = open('C:/temp.txt', 'r').read() #read in the temporary file
time.sleep(3)
os.remove('C:/temp.txt') # clean up
soup = BeautifulSoup(xml_string)
result = soup.find('bibliographic-data')
if result is not None:
status = result['status']
g.write("\nApplication Status: "+status+"\n")
g.write("Most Recent Event Information: \n")
#...i go on to parse the document
エラーが発生しています:
curl:(56) Received HTTP code 407 from proxy after CONNECT
アクセスが拒否されている理由は何ですか?