空のセル値の次のコードが次のセル値に使用されるという点で、pdfデータリーダーに問題があります..!
例: 生徒が 3 つの科目の結果を持っています。サブジェクト 1: "A" グレード、サブジェクト 2: "" グレード、サブジェクト 3: "C" グレード。
ここでサブジェクト 2 には成績値がなく、空ですが、ここではサブジェクト 2 の値としてサブジェクト 3 の値「c」を取ります...
どうすればその問題を修正できますか..?
私を助けてください ...
import tabula
def readpdf():
df = tabula.read_pdf("/tmp/university_exam_results.pdf", output_format="json", pages="all")
page = 1
student_subject_grade = {}
subject_codes = []
for entry in df:
table_row = 1
subject_split = False
for row in entry['data']:
# subject_split = False
if table_row == 1:
col = 0
pagebeginning = False
for column in row:
if col == 0 and column['text'] == '':
pagebeginning = True
subject_codes = []
elif col == 1 and column['text'] == "Subject Code - >":
pagebeginning = True
subject_codes = []
if not pagebeginning:
if col == 0:
registration_number = column['text']
if not registration_number in student_subject_grade:
student_subject_grade[registration_number] = {}
elif col == 1:
student_name = column['text']
student_subject_grade[registration_number]['name'] = student_name
elif col > 1:
student_subject_grade[registration_number][subject_codes[col-2]] = column['text']
else:
if col > 1:
subject_codes.append(column['text'])
col += 1
elif table_row == 2:
if pagebeginning:
col = 0
for column in row:
if col == 0 and column['text'] == 'Reg. Number':
continue
elif col == 0 and column['text'] == '':
subject_split = True
elif col == 1 and column['text'] == 'Stud. Name':
continue
elif col == 1 and column['text'] == '' and subject_split:
subject_split = True
if subject_split and col > 1:
subject_codes[col-2] = subject_codes[col-2] + column['text']
col += 1
else:
col = 0
for column in row:
if col == 0:
registration_number = column['text']
if not registration_number in student_subject_grade:
student_subject_grade[registration_number] = {}
elif col == 1:
student_name = column['text']
student_subject_grade[registration_number]['name'] = student_name
elif col > 1:
student_subject_grade[registration_number][subject_codes[col-2]] = column['text']
col += 1
else:
if pagebeginning and subject_split and table_row == 3:
col = 0
for column in row:
if col == 0 and column['text'] == 'Reg. Number':
continue
elif col == 1 and column['text'] == 'Stud. Name':
continue
col += 1
else:
col = 0
for column in row:
if col == 0:
registration_number = column['text']
if not registration_number in student_subject_grade:
student_subject_grade[registration_number] = {}
elif col == 1:
student_name = column['text']
student_subject_grade[registration_number]['name'] = student_name
elif col > 1:
student_subject_grade[registration_number][subject_codes[col-2]] = column['text']
col += 1
table_row += 1
page += 1
total_students = 1
university_performance_ids = []
for key, details in student_subject_grade.iteritems():
if key == '953413114041':
print "---------------------------------------------------------------------------"
print total_students, key
print "---------------------------------------------------------------------------"
print details, "--------------------------------------------------------------------------"
registration_number = key
student_name = details['name']
for k, v in details.iteritems():
if key == '953413114041':
print k, ":", v
if k == 'name':
continue
if v != '':
university_performance_ids.append((0, 0, {'registration_number': registration_number, 'student_name': student_name,
'subject_code': k, 'grade': v}))
print "------------------------------------------------------------------------"
total_students += 1
return university_performance_ids