私はグーグルの特許リストからデータをダウンロードして解析するためにすでに書かれたPythonモジュールを持っています。2005年より前に何かをするまで、コードはうまく機能します。モジュールの実行方法以外はPythonの知識がありません。どうすれば修正できますか?
私が受け取るトレースバックは次のとおりです。
Traceback (most recent call last):
File "C:\Users\John\Desktop\FINAL BART ALL INFO-Magic Bullet.py", line 46, in <module>
assert xml_file is not None
AssertionError
そして、これは私が使用しているコードです:
#Ignore all this information
import urllib2, os, zipfile
from lxml import etree
#-------------------------------------------------------------------------------
#Ignore all this information
def xmlSplitter(data,separator=lambda x: x.startswith('<?xml')):
buff = []
for line in data:
if separator(line):
if buff:
yield ''.join(buff)
buff[:] = []
buff.append(line)
yield ''.join(buff)
def first(seq,default=None):
"""Return the first item from sequence, seq or the default(None) value"""
for item in seq:
return item
return default
#-------------------------------------------------------------------------------
#This is where you change the internet source file- Use the file extensions from the sheet provided.
datasrc = "http://storage.googleapis.com/patents/grant_full_text/2003/pg030107.zip"
#http://commondatastorage.googleapis.com/patents/grant_full_text/2012/ipg120117.zip
filename = datasrc.split('/')[-1]
#-------------------------------------------------------------------------------
#Ignore all this information
if not os.path.exists(filename):
with open(filename,'wb') as file_write:
r = urllib2.urlopen(datasrc)
file_write.write(r.read())
zf = zipfile.ZipFile(filename)
xml_file = first([ x for x in zf.namelist() if x.endswith('.xml')])
assert xml_file is not None
#-------------------------------------------------------------------------------
#output set your folder location here, keep double \\ between
outFolder = "C:\\PatentFiles\\"
outFilename = os.path.splitext(filename)[0]
#-------------------------------------------------------------------------------
#These outputs are the names of the files-Ignore all this information
output = outFolder + outFilename + "_general.txt"
output2 = outFolder + outFilename + "_USCL.txt"
output3 = outFolder + outFilename + "_citation.txt"
output4 = outFolder + outFilename + "_inventor.txt"
#Open files
outFile = open(output, "w")
outFile2 = open(output2, "w")
outFile3 = open(output3, "w")
outFile4 = open(output4, "w")
#write the headers
outFile.write("Patent No.|GrantDate|Application Date|Number of Claims|Examiners|US Primary Main Classification|Assignee|Assignee Address City_State_Country|First Inventor|First Inventor Address City_State_Country| \n")
outFile2.write("Patent No.|Primary|U.S Classification| \n")
outFile3.write ("Patent No.|Citation|Citation Date|Who Cited This| \n")
outFile4.write ("Patent No.|Inventor Last Name|First Name|City|State|Country|Nationality Country|Residence Country|\n")
#-------------------------------------------------------------------------------
#Here is the count- adjust this each time you run the program for the first time.
#Run at 10 for the 1st run then 5500 afterward.
count = 0
for item in xmlSplitter(zf.open(xml_file)):
count += 1
#5500
if count > 10: break
doc = etree.XML(item)
#-------------------------------------------------------------------------------
#This is where the python starts parsing the infomation.
#This is the Start of the General Infomation file.
docID = "~".join(doc.xpath('//publication-reference/document-id/country/text()|//publication-reference/document-id/doc-number/text()'))
docID = docID.replace("D0","D")
docID = docID.replace("H000","H")
docID = docID.replace("PP0","PP")
docID = docID.replace("PP0","PP")
docID = docID.replace("RE0","RE")
docID = docID.replace("~0","~")
docID = docID.replace("US~","")
grantdate = first(doc.xpath('//publication-reference/document-id/date/text()'))
applicationdate = first(doc.xpath('//application-reference/document-id/date/text()'))
claimsNum = first(doc.xpath('//number-of-claims/text()'))
assignee1 = "-".join(doc.xpath('//assignees/assignee/addressbook/orgname/text()|//assignees/assignee/addressbook/last-name/text()|//assignees/assignee/addressbook/first-name/text()'))
assignee1 = assignee1.replace('-',', ')
assignee2 = "_".join(doc.xpath('//assignee/addressbook/address/*/text()'))
assignees = str(assignee1.encode("UTF-8")) + "|" + str(assignee2.encode("UTF-8"))
inventors1 = first(doc.xpath('//applicants/applicant/addressbook/last-name/text()'))
inventor2 = first(doc.xpath('//applicants/applicant/addressbook/first-name/text()'))
inventor3 = first(doc.xpath('//applicants/applicant/addressbook/address/city/text()'))
inventor4 = first(doc.xpath('//applicants/applicant/addressbook/address/state/text()'))
inventor5 = first(doc.xpath('//applicants/applicant/addressbook/address/country/text()'))
inventor = str(inventor2.encode("UTF-8") if inventor2 else inventor2) + " " + str(inventors1.encode("UTF-8") if inventors1 else inventors1)
inventors2 = str(inventor3.encode("UTF-8") if inventor3 else inventor3) + "_" + str(inventor4) + "_" + str(inventor5)
inventors = str(inventor) + "|" + str(inventors2)
examiners = "~".join(doc.xpath('//examiners/primary-examiner/first-name/text()|//examiners/primary-examiner/last-name/text()'))
examiners = examiners.replace("~",", ")
uscl1 = first(doc.xpath('//classification-national/main-classification/text()'))
#END FIRST TEXT FILE #-------------------------------------------------------------------------------
#This begings the USCL file
notprimary = first(doc.xpath('//publication-reference/document-id/country/text()'))
notprimary = notprimary.replace("US","0")
primary1 = first(doc.xpath('//publication-reference/document-id/country/text()'))
primary1 = primary1.replace("US","1")
uscl2 = "~".join(doc.xpath('//us-bibliographic-data-grant/classification-national/*/text()|//sequence-cwu/publication-reference/document-id/country/text()'))
#-------------------------NOTE--------------------------------------------------
#--------------------------NOTE-------------------------------------------------
#-----------------------NOTE----------------------------------------------------
#NOTE- RUN through count 10 then remove pound signs from two below
uscl2 = uscl2.replace("US~", str(primary1) + "|")
uscl2 = uscl2.replace("~", "|" + "\n" + str(docID) + "|" + str(notprimary) + "|")
uscl2 = uscl2.replace("US", "|")
#END SECOND TEXT FILE #-------------------------------------------------------------------------------
#Begin the Citation file
citation = '~'.join(doc.xpath('//publication-reference/document-id/country/text()|//references-cited/citation/patcit/document-id/country/text()|//references-cited/citation/patcit/document-id/doc-number/text()|//references-cited/citation/patcit/document-id/kind/text()|//references-cited/citation/patcit/document-id/date/text()|//references-cited/citation/category/text()'))
#Here is the start of the patent connectors- in the patents they exist at the end. They are replaced in this code to make pipes | for the final output
citation = citation.replace("~A~", "$@")
citation = citation.replace("~S~", "$@")
citation = citation.replace("~S1~", "$@")
citation = citation.replace("~B1~", "$@")
citation = citation.replace("~B2~", "$@")
citation = citation.replace("~A1~", "$@")
citation = citation.replace("~H~", "$@")
citation = citation.replace("~E~", "$@")
#citation = citation.replace("~QQ~", "$@")
#make unique citation changes here-for example when "US" or "DE" in imbeded in citation see below
citation = citation.replace("05225US~", "05225U$|" )
citation = citation.replace("063106 DE", "063106D!" )
citation = citation.replace("US~US~", "US~" )
citation = citation.replace("PCT/US", "PCT/U$")
citation = citation.replace("PCTUS", "PCTU$")
citation = citation.replace("WO US", "WO U$")
citation = citation.replace("WO~US", "WO~ U$")
#fixes for cites without pipes-see below -DONT TOUCH THESE
citation = citation.replace("US~cited by examiner", "||cited by examiner" )
citation = citation.replace("US~cited by other", "||cited by other" )
#Here are the changes to return each citation into a unique row
#If a country is only listed in the columns in Excel they need a fix like this, If KR is alone then use the code:::: citation = citation.replace("KR~", "Foreign -KR-" )
citation = citation.replace("$@", "|")
citation = citation.replace("~US~", "|" + "\n" + str(docID) +"|")
citation = citation.replace("US~", "")
citation = citation.replace("~JP~", "|" + "\n" + str(docID) +"|"+ "Foreign -JP-")
citation = citation.replace("JP~", "Foreign -JP-" )
citation = citation.replace("~GB~", "|" + "\n" + str(docID) +"|"+ "Foreign -GB-")
citation = citation.replace("GB~", "Foreign -GB-" )
citation = citation.replace("~WO~", "|" + "\n" + str(docID) +"|"+ "Foreign -WO-")
citation = citation.replace("WO~", "Foreign -WO-" )
citation = citation.replace("~CA~", "|" + "\n" + str(docID) +"|"+ "Foreign -CA-")
citation = citation.replace("~DE~EP~", "~DE~ EP-" )
citation = citation.replace("~DE~", "|" + "\n" + str(docID) +"|"+ "Foreign -DE-")
citation = citation.replace("DE~", "Foreign -DE-" )
citation = citation.replace("~KR~", "|" + "\n" + str(docID) +"|"+ "Foreign -KR-")
citation = citation.replace("KR~", "Foreign -KR-" )
citation = citation.replace("~EM~", "|" + "\n" + str(docID) +"|"+ "Foreign -EM-")
citation = citation.replace("~CH~", "|" + "\n" + str(docID) +"|"+ "Foreign -CH-")
citation = citation.replace("~DE~", "|" + "\n" + str(docID) +"|"+ "Foreign -DE-")
citation = citation.replace("~SE~", "|" + "\n" + str(docID) +"|"+ "Foreign -SE-")
citation = citation.replace("~FR~", "|" + "\n" + str(docID) +"|"+ "Foreign -FR-")
citation = citation.replace("~FR~EP~", "~FR~ EP-" )
citation = citation.replace("FR~", "Foreign -FR-" )
citation = citation.replace("~CN~", "|" + "\n" + str(docID) +"|"+ "Foreign -CN-")
citation = citation.replace("~TW~", "|" + "\n" + str(docID) +"|"+ "Foreign -TW-")
citation = citation.replace("~TW", "|" + "\n" + str(docID) +"|"+ "Foreign -TW-")
citation = citation.replace("TW~", "Foreign -TW-" )
citation = citation.replace("~NL~", "|" + "\n" + str(docID) +"|"+ "Foreign -NL-")
citation = citation.replace("~BR~", "|" + "\n" + str(docID) +"|"+ "Foreign -BR-")
citation = citation.replace("~AU~", "|" + "\n" + str(docID) +"|"+ "Foreign -AU-")
citation = citation.replace("~ES~", "|" + "\n" + str(docID) +"|"+ "Foreign -ES-")
citation = citation.replace("~IT~", "|" + "\n" + str(docID) +"|"+ "Foreign -IT-")
citation = citation.replace("~SU~", "|" + "\n" + str(docID) +"|"+ "Foreign -SU-")
citation = citation.replace("~AT~", "|" + "\n" + str(docID) +"|"+ "Foreign -AT-")
citation = citation.replace("~BE~", "|" + "\n" + str(docID) +"|"+ "Foreign -BE-")
citation = citation.replace("~DK~", "|" + "\n" + str(docID) +"|"+ "Foreign -DK-")
citation = citation.replace("~RU~", "|" + "\n" + str(docID) +"|"+ "Foreign -RU-")
citation = citation.replace("RU~", "Foreign -RU-" )
#citation = citation.replace("~QQ~", "|" + "\n" + str(docID) +"|"+ "Foreign -QQ-")
#These are just end of citation fixes-DONT TOUCH THESE
citation = citation.replace("cited by other~cited by other~cited by other~cited by other~cited by other~cited by other~cited by other~cited by other~cited by other~cited by other", "cited by other" )
citation = citation.replace("cited by examiner~cited by other~cited by other", "cited by examiner" )
citation = citation.replace("cited by other~cited by examiner~cited by examiner", "cited by other" )
citation = citation.replace("cited by other~cited by other~cited by other~cited by other", "cited by other" )
citation = citation.replace("cited by examiner~cited by examiner~cited by examiner~cited by examiner", "cited by examiner" )
citation = citation.replace("cited by other~cited by other", "cited by other" )
citation = citation.replace("cited by examiner~cited by examiner", "cited by examiner" )
citation = citation.replace("cited by other~cited by examiner", "cited by other" )
citation = citation.replace("cited by examiner~cited by other", "cited by examiner" )
citation = citation.replace("cited by examiner~cited by other~cited by other", "cited by examiner" )
citation = citation.replace("cited by other~cited by examiner~cited by examiner", "cited by other" )
citation = citation.replace("cited by other~cited by other~cited by other~cited by other", "cited by other" )
citation = citation.replace("cited by examiner~cited by examiner~cited by examiner~cited by examiner", "cited by examiner" )
citation = citation.replace("cited by other~cited by other", "cited by other" )
citation = citation.replace("cited by examiner~cited by examiner", "cited by examiner" )
citation = citation.replace("cited by other~cited by examiner", "cited by other" )
citation = citation.replace("cited by examiner~cited by other", "cited by examiner" )
citation = citation.replace("cited by examiner~cited by other~cited by other", "cited by examiner" )
citation = citation.replace("cited by other~cited by examiner~cited by examiner", "cited by other" )
citation = citation.replace("cited by other~cited by other~cited by other~cited by other", "cited by other" )
citation = citation.replace("cited by examiner~cited by examiner~cited by examiner~cited by examiner", "cited by examiner" )
citation = citation.replace("cited by other~cited by other", "cited by other" )
citation = citation.replace("cited by examiner~cited by examiner", "cited by examiner" )
citation = citation.replace("cited by other~cited by examiner", "cited by other" )
citation = citation.replace("cited by examiner~cited by other", "cited by examiner" )
citation = citation.replace("cited by other~cited by other", "cited by other" )
citation = citation.replace("cited by examiner~cited by examiner", "cited by examiner" )
citation = citation.replace("cited by other~cited by examiner", "cited by other" )
citation = citation.replace("cited by examiner~cited by other", "cited by examiner" )
citation = citation.replace("~", "|" )
citation = citation.replace("US", "||")
#make unique post-processing citation changes here-If needed for the end of the scripts
citation = citation.replace("CA|", "Foreign -CA-" )
citation = citation.replace("EP|", "Foreign -EP-" )
citation = citation.replace("CN|", "Foreign -CN-" )
citation = citation.replace("$", "S")
citation = citation.replace("D!", "DE")
#citation = citation.replace(" ", " " )
#END CITATION FILE-------------------------------------------------------------------------------
#START the inventors file
inventor1 = doc.xpath('//applicants/applicant/addressbook/last-name/text()|//applicants/applicant/addressbook/first-name/text()|//applicants/applicant/addressbook/address/city/text()|//applicants/applicant/addressbook/address/state/text()|//applicants/applicant/addressbook/address/country/text()|//applicants/applicant/nationality/*/text()|//applicants/applicant/residence/*/text()|//sequence-cwu/publication-reference/document-id/country/text()|//sequence-cwu/number/text()')
inventor1 = '~'.join(inventor1).replace('\n-','')
#For files after 2009 use this to replace State errors in the Excel- If the output is short then use this to add in a None value for State
inventor1 = inventor1.replace('~KR~omitted','~None~KR~omitted')
inventor1 = inventor1.replace('~GB~omitted','~None~GB~omitted')
inventor1 = inventor1.replace('~IT~omitted','~None~IT~omitted')
inventor1 = inventor1.replace('~JP~omitted','~None~JP~omitted')
inventor1 = inventor1.replace('~FR~omitted','~None~FR~omitted')
inventor1 = inventor1.replace('~BR~omitted','~None~BR~omitted')
inventor1 = inventor1.replace('~NO~omitted','~None~NO~omitted')
inventor1 = inventor1.replace('~HK~omitted','~None~HK~omitted')
inventor1 = inventor1.replace('~CA~omitted','~None~CA~omitted')
inventor1 = inventor1.replace('~TW~omitted','~None~TW~omitted')
inventor1 = inventor1.replace('~SE~omitted','~None~SE~omitted')
inventor1 = inventor1.replace('~CH~omitted','~None~CH~omitted')
inventor1 = inventor1.replace('~DE~omitted','~None~DE~omitted')
inventor1 = inventor1.replace('~SG~omitted','~None~SG~omitted')
inventor1 = inventor1.replace('~IN~omitted','~None~IN~omitted')
inventor1 = inventor1.replace('~IL~omitted','~None~IL~omitted')
inventor1 = inventor1.replace('~CN~omitted','~None~CN~omitted')
inventor1 = inventor1.replace('~FI~omitted','~None~FI~omitted')
inventor1 = inventor1.replace('~ZA~omitted','~None~ZA~omitted')
inventor1 = inventor1.replace('~NL~omitted','~None~NL~omitted')
inventor1 = inventor1.replace('~AT~omitted','~None~AT~omitted')
inventor1 = inventor1.replace('~AU~omitted','~None~AU~omitted')
inventor1 = inventor1.replace('~BE~omitted','~None~BE~omitted')
inventor1 = inventor1.replace('~CZ~omitted','~None~CZ~omitted')
inventor1 = inventor1.replace('~RU~omitted','~None~RU~omitted')
inventor1 = inventor1.replace('~IE~omitted','~None~IE~omitted')
inventor1 = inventor1.replace('~AR~omitted','~None~AR~omitted')
inventor1 = inventor1.replace('~MY~omitted','~None~MY~omitted')
inventor1 = inventor1.replace('~SK~omitted','~None~SK~omitted')
inventor1 = inventor1.replace('~ES~omitted','~None~ES~omitted')
inventor1 = inventor1.replace('~NZ~omitted','~None~NZ~omitted')
inventor1 = inventor1.replace('~HU~omitted','~None~HU~omitted')
inventor1 = inventor1.replace('~UA~omitted','~None~UA~omitted')
inventor1 = inventor1.replace('~DK~omitted','~None~DK~omitted')
inventor1 = inventor1.replace('~TH~omitted','~None~TH~omitted')
inventor1 = inventor1.replace('~MX~omitted','~None~MX~omitted')
#inventor1 = inventor1.replace('~QQ~omitted','~None~QQ~omitted')
#For the 2005-2008 files use these lines
inventor1 = inventor1.replace('~NO~NO~NO','~None~NO~NO~NO')
inventor1 = inventor1.replace('~NZ~NZ~NZ','~None~NZ~NZ~NZ')
inventor1 = inventor1.replace('~RU~RU~RU','~None~RU~RU~RU')
inventor1 = inventor1.replace('~RO~RO~RO','~None~RO~RO~RO')
inventor1 = inventor1.replace('~SE~SE~SE','~None~SE~SE~SE')
inventor1 = inventor1.replace('~SG~SG~SG','~None~SG~SG~SG')
inventor1 = inventor1.replace('~SI~SI~SI','~None~SI~SI~SI')
inventor1 = inventor1.replace('~TH~TH~TH','~None~TH~TH~TH')
inventor1 = inventor1.replace('~TR~TR~TR','~None~TR~TR~TR')
inventor1 = inventor1.replace('~TW~TW~TW','~None~TW~TW~TW')
inventor1 = inventor1.replace('~VE~VE~VE','~None~VE~VE~VE')
inventor1 = inventor1.replace('~ZA~ZA~ZA','~None~ZA~ZA~ZA')
inventor1 = inventor1.replace('~AN~AN~AN','~None~AN~AN~AN')
inventor1 = inventor1.replace('~AR~AR~AR','~None~AR~AR~AR')
inventor1 = inventor1.replace('~BA~BA~BA','~None~BA~BA~BA')
inventor1 = inventor1.replace('~PH~PH~PH','~None~PH~PH~PH')
inventor1 = inventor1.replace('~HR~HR~HR','~None~HR~HR~HR')
inventor1 = inventor1.replace('~LT~LT~LT','~None~LT~LT~LT')
inventor1 = inventor1.replace('~EE~EE~EE','~None~EE~EE~EE')
inventor1 = inventor1.replace('~BJ~BJ~BJ','~None~BJ~BJ~BJ')
inventor1 = inventor1.replace('~CR~CR~CR','~None~CR~CR~CR')
inventor1 = inventor1.replace('~PL~PL~PL','~None~PL~PL~PL')
inventor1 = inventor1.replace('~CO~CO~CO','~None~CO~CO~CO')
inventor1 = inventor1.replace('~UA~UA~UA','~None~UA~UA~UA')
inventor1 = inventor1.replace('~KW~KW~KW','~None~KW~KW~KW')
inventor1 = inventor1.replace('~CL~CL~CL','~None~CL~CL~CL')
inventor1 = inventor1.replace('~CY~CY~CY','~None~CY~CY~CY')
inventor1 = inventor1.replace('~LI~LI~LI','~None~LI~LI~LI')
inventor1 = inventor1.replace('~SA~SA~SA','~None~SA~SA~SA')
#inventor1 = inventor1.replace('~QQ~QQ~QQ','~None~QQ~QQ~QQ')
#For lines that don't return use these lines in the code for 2009-
inventor1 = inventor1.replace('omitted~US~','omitted~US' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('omitted~FR~','omitted~FR' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('omitted~DK~','omitted~DK' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('omitted~KR~','omitted~KR' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('omitted~JP~','omitted~JP' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('omitted~GB~','omitted~GB' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('omitted~IT~','omitted~IT' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('omitted~CH~','omitted~CH' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('omitted~SG~','omitted~SG' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('omitted~DE~','omitted~DE' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('omitted~IN~','omitted~IN' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('omitted~TW~','omitted~TW' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('omitted~CN~','omitted~CN' +"|"+ '\n' + str(docID) +"|")
#inventor1 = inventor1.replace('omitted~QQ~','omitted~QQ' +"|"+ '\n' + str(docID) +"|")
#for lines 2005-2008 use this line for returning countries
inventor1 = inventor1.replace('AT~AT~AT~','AT~AT~AT' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('AN~AN~AN~','AN~AN~AN' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('AR~AR~AR~','AR~AR~AR' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('AU~AU~AU~','AU~AU~AU' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('AZ~AZ~AZ~','AZ~AZ~AZ' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('BA~BA~BA~','BA~BA~BA' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('BE~BE~BE~','BE~BE~BE' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('BR~BR~BR~','BR~BR~BR' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('BS~BS~BS~','BS~BS~BS' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('CA~CA~CA~','CA~CA~CA' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('CH~CH~CH~','CH~CH~CH' +"|"+ '\n' + str(docID) +"|")
inventor1 = inventor1.replace('CN~CN~CN~','CN~CN~CN' +"|"+ '\n' + str(docID) +"|")
#inventor1 = inventor1.replace('QQ~QQ~QQ~','QQ~QQ~QQ' +"|"+ '\n' + str(docID) +"|")
#special case fixes- these are for strange names fixes in the code that may not create the correct amount of columns.
inventor1 = inventor1.replace('~None~None~NO~','~None~NO~')
inventor1 = inventor1.replace('Ramandeep~Chandigarh','Ramandeep|None~Chandigarh')
inventor1 = inventor1.replace('Esk~eh~r','Eskehr')
inventor1 = inventor1.replace('Baychar~Eastport','Baychar~None~Eastport')
inventor1 = inventor1.replace('US~1', '||||||')
inventor1 = inventor1.replace('~','|')
#End the inventor file
#-------------------------------------------------------------------------------
#Here are the output print fields- you can change one if you want but remember to comment out all but the one you wish to view.
print "DocID: {0}\nGrantDate: {1}\nApplicationDate: {2}\nNumber of Claims: {3}\nExaminers: {4}\nAssignee: {5}\nInventor: {6}\nUS Cl.: {7}\n".format(docID,grantdate,applicationdate,claimsNum,examiners.encode("UTF-8"),assignees,inventors,uscl1)
#print "DocID: {0}\nU.S Cl: {1}\nPrimary: {2}\n".format(docID,uscl2,primary1)
#print "DocID: {0}\nCitation: {1}\n".format(docID,citation.encode("UTF-8"))
#print "DocID: {0}\nTitle: {1}\nInventors: {2}\n".format(docID,appID,inventor1.encode("UTF-8"))
#------------------------------------------------------------------------------- IGNORE Everything else below this.
#Output first general info bits
outFile.write(str(docID) +"|"+ str(grantdate) +"|"+ str(applicationdate) + "|"+ str(claimsNum) + "|"+ str(examiners.encode("UTF-8")) + "|"+ str(uscl1) + "|"+ str(assignees) + "|"+ str(inventors) +"|"+"\n")
#Output Classifications only
outFile2.write(str(docID) +"|"+ str(uscl2) +"|"+ "\n")
#Output Citations only
outFile3.write(str(docID) +"|"+ str(citation) +"|"+"\n")
#Output inventors only
outFile4.write(str(docID) + "|"+ str(inventor1.encode("UTF-8")) + "|" +"\n")
outFile.close()
outFile2.close()
outFile3.close()
outFile4.close()
print "output files complete"