from tkinter import * import re linesIn = o LinesOut = 0 linesNow = 0 OLDNAMES = "names.tsv" NEWNAMES = "cnames.tsv" DATEROW = 3 DATECOL = 1 RESROW = 5 RESCOL = 1 ALTROW = 7 ALTCOL = 1 def open_file(fl, mode): try: flName = open(fl, mode, encoding='latin1') except OSError: err= f"OSError: Cannot open file {fl} in mode {mode}." print(err) return None except: err= f"Other error: Cannot open file {fl} in mode {mode}." print(err) return None return flName # end of openFile def manually_repair_line(root,line): eNewLine = Entry(root, width=80) eNewline.insert(0, line) eNewLine.grid(row = 9, column = 0, sticky = W) newLine = eNewLine.get() eNewLine.destroy() my_button(root,'OK', 10, 1) return newLIne #end manually_repair_line def valid_date(date): # strip off a leading "flourish" if date[0:2] == 'fl': date = date.strip('fl ') #Check for valid date format if re.search('^c?[1-2][0-9][0-9][0-9]-?c?', date) == None: return FALSE # Check last 4 digits of date date = date[-4:] if not re.search('[1-2][0-9][0-9][0-9]', date): return FALSE return TRUE #End valid_date def yes_or_no(root,txt, default, myrow, mycolumn): lAnswer = Label(root, text=f"{txt} (y/n):") lAnswer.grid(row=myrow, column=mycolumn+2, sticky = W) eAnswer = Entry(root, width=1) eAnswer.insert(0,default) eAnswer.grid(row=myrow,column=mycolumn+3, sticky=W) my_button(root,txt,myrow,mycolumn+4) answer = eAnswer.get()[0].lower() lAnswer.destroy() eAnswer.destroy() return answer def my_button(root, txt, myrow, mycolumn): button_pressed = StringVar() button = Button(root, text=txt, command=lambda: button_pressed.set("button pressed")) button.grid(row=myrow, column=mycolumn) button.wait_variable(button_pressed) button.destroy() def get_date(root, dtIn): while TRUE: eDate = Entry(root, width=20) eDate.insert(0, dtIn) eDate.grid(root, DATEROW, DATECOL) date = eDate.get() my_button(root,"OK",DATEROW,DATECOL+1) #Use previous entry as default, now dtIn = date if valid_date(date): break return date #end get_date def get_list(root, stIn, myrow, mycolumn, mywidth): stOut = '' #any reasonable name would surely have more than 3 characters? if len(stIn) > 3: arList = stIn.split('|') # see what we have already got if len(arList) > 0: for item in arList: eItem = Entry(root, width = mywidth) eItem.insert(0, item) eItem.grid(row=myrow, column=mycolumn, sticky = W) my_button(root,"OK",myrow,mycolumn+1) if eItem.get() != "": stOut = stOut + '|' + eItem.get() while TRUE: answer = yes_or_no(root, "Another item", 'n', myrow, mycolumn+1) if answer == 'n': break if answer != 'y': continue eItem = Entry(root, width = mywidth) eItem.grid(row = myrow, column = mycolumn, sticky = W) newItem = eItem.get() my_button(root,"OK",myrow,mycolumn+1) if newItem != "": stOut = stOut + '|' + newItem stOut = stOut.strip('|') if stOut != "": #might need to adjust font, here, to avoid wrap` lAll = Label(root,text=stIn) lAll.grid(row=9, column = 0, sticky = W) answer = yes_or_no(root, "Result OK?", 'y', 10, 0) lAll.destroy() if answer == 'n': # Start over stOut = get_list(root, stIn, myrow, mycolumn, mywidth) return stOut #end get_list def convert_record(root, line): arRec = line.split('\t') if len(arRec) != 6: Label(root,text=f"{line}: Record mangled. Repair manually.").grid(row=6,column=0) newline = manually_repair_line(root,line) return newline date = "" firstlast = arRec[0] lastfirst = arRec[1] birthplace = arRec[3] #Show which name we are working with lName = Label(root, text=firstlast) lName.grid(row = 1, column= 0,padx=10,pady=10) # and current values ldate = Label(root, text = arRec[2]) lDate.grid(row=DATEROW, column = DATECOL, padx, pady) lRes = Label(root, text = arRec[4]) lRes.grid(row=RESROW, column = RESCOL, padx, pady) lAlt = Label(root, text = arRec[4]) lAlt.grid(row=ALTROW, column = ALTCOL, padx, pady) while date == "": date = get_date(root, arRec[2]) residences = get_list(root,arRec[4], RESROW, RESCOL, 20) alternates = get_list(root, arRec[5], ALTROW, ALTCOL, 40) newline = firstlast + '\t' + lastfirst + '\t' + date + '\t' + birthplace + '\t' + residences + '\t' + alternates lName.destroy() return newLine #end convert_record def write_and_exit: flOldNames.close() flNewNames.close() print(f"{linesIn} total lines in") for count, line in enumerate(flNewNames): pass print(f"{count+1} total lines Out") print(f"{linesNow} lines this session") exit(0) def main(): flOldNames = open_file(OLDNAMES, "r") if not flOldNames: print(f"Cannot open {OLDNAMES} file for reading.") return False flNewNames = open_file(NEWNAMES, "w") if not flNewNames: print(f"Cannot open {NEWNAMES} file for writing.") return False stOldNames = flOldNames.read() arLinesIn = stOldNames.split('\n') linesIn = len(arLinesIn) try: # after reading the file, open puts a \n at the end, so # we go for the penultimate line. starting_rec = int(arLinesIn[len(arLinesIn) -2]) except ValueError: # no numeral at the end of the file starting_rec = 1 # skip lines already done, writing these out without processing for line in arLinesIn[:starting_rec]: flNewNames.write(line) # Start at first line not done root = Tk() root.option_add("*Font", "Times 18") root.update() # Place in upper left corner root.geometry('800x500') root.title("Correcting Names File") # Put in field labels, here, in column 0 Label(root, text="Date: ").grid(column= 0, DATEROW, padx=10,pady=10) Label(root, text="Residences: ").grid(column=0,RESROW,padx=10,pady=10) Label(root, text="Names: ").grid(column=0, NAMEROW, padx=10,pady=10) for line in arLinesIn[starting_rec:]: if line == '': continue # create a corrected new line newLine = convert_record(root, line) # write it out flNewNames.write(newLine) linesOutNow += 1 # flNewNames.close() # root.mainloop() # end main if __name__ == "__main__": main()