""" The structure for a setting would be a complex list: [ [All fields of dft.tsv except piece, section, ensemble, part], sections [ sect1, [ ensemble, parts [ part1 [ inst1, inst2...], part2 [inst1, inst2...], ... ] ] sect2, [ (etc.) ] ... ] ] Should this be a dictionary instead? """ import json, re from operator import itemgetter LOCALBASE = "/mnt/d/website/" INSTFILE = LOCALBASE + "instruments.tsv" #DFTFILE = LOCALBASE + "design/" + "dft.tsv" DFTFILE = LOCALBASE + "design/" + "dft.tsv" #SETTINGSFILE = LOCALBASE + "settings.json" SETTINGSFILE = LOCALBASE + "design/" + "settings.json" ERRORFILE = LOCALBASE + "design/" + "slerr.txt" # we need this to get keys in proper order def getNumericPage(stPage, line): #remove spaces stPage = stPage.replace(' ', '') # Where is the erroneous key? if stPage == '': print('blank page') print(getKey(line)) return 'XXXXX' if stPage[0] in ['#', '%']: suffix = '' if stPage[-1].isalpha(): suffix = stPage[-1] stPage = stPage[:-1] stNum = stPage[1:].zfill(3) return stPage[0] + stNum + suffix # So it must begin with 'f.' or 'p.' # Lop these off stPage = stPage[2:] if stPage[0].isdigit(): val = re.search('([0-9][0-9]*)([v,a-n,x-z]*)$', stPage, re.DOTALL) # It's a numeric page number with a possible suffix, like 49vb if val: stNumber = val[1].zfill(3) suffix = val[2] return stNumber + suffix #It's a sig, like a4v, not a folio or page number else: return(stPage) # get key from dft line def getKey(line): # only need 1st part of source and document (before the ":") keyOut = [line[4].split(':')[0], line[5].split(':')[0], line[6], line[8]] return keyOut def get_section_and_part(line): data = [line[16], line[21]] return data #Get data common to all parts in a setting def getCommon(line): [title,subtitle,composer,orig_comp,source,document,volume,date,page,editor,encoder,arranger,intabulator,contributor,concordances,piece,section,mType,mKey,difficulty,ensemble,part,remarks,recording,facsimile,fronimo,PDF,midi,created,modified] = line #Lop off line feed character at end of line modified= modified[:-1] common = [title,subtitle,composer,orig_comp,source,document,volume,date,page,mType,mKey,difficulty,ensemble,remarks,recording,facsimile,fronimo,PDF,midi,created,modified] return(common) # 1st step of building a new setting # get data common to all sections and parts def addCommon(line, setting): setting.append(getCommon(line)) return(setting) # Next step: add section and part data from current line. def addTo (line, setting): [title,subtitle,composer,orig_comp,source,document,volume,date,page,editor,encoder,arranger,intabulator,contributor,concordances,piece,section,mType,mKey,difficulty,ensemble,part,remarks,recording,facsimile,fronimo,PDF,midi,created,modified] = line if section == "": if part == "": setting.append(["",ensemble]) else: setting.append(["", part]) else: if part == "": setting.append([section, ensemble]) else: setting.append([section, part]) return(setting) # addTo #Put instrument lists in proper order def reorder(setting): # do something return(setting) #This would use data in instOrder to put ensemble and parts in order #Then complete setting record def finish (setting): setting = reorder(setting) # complete setting record, like maybe adding a ']'? return(setting) #Write out liSettings as json file, using json.dumps() or other method def writeSettings(liSettings, flSettings): json.dump(liSettings, flSettings, indent=2) def removeZeros(stPage, line): if stPage == '000': #Avoid problem in while stPage[0] == '0' return '0' if stPage == '' or stPage == '0' or stPage == None: print(line) input() return stPage stPrefix = "" if stPage[0] == '%' or stPage[0]=='#': stPrefix = stPage[0] stPage = stPage[1:] # stPage should not be empty or equal to "0" if stPage == '' or stPage == '0': stPage = '999' while stPage[0] == '0': stPage = stPage[1:] stPage = stPrefix + stPage return(stPage) def main(): # get list of instruments (which are in proper order for display) instOrder = {} n = 0 with open(INSTFILE) as file: for line in file: instOrder[line.split('\t')[0]] = n n += 1 flSettings = open(SETTINGSFILE, "w", encoding = 'latin1') flError = open(ERRORFILE, "w", encoding = 'latin1') # read dft.tsv lines into an array, liDft flDft = open (DFTFILE, "r", encoding = 'latin1') liDft = [] for line in flDft: liDft.append(line.split("\t")) #Remove and save header line liHeaders = liDft.pop(0) #normalize numeric page designations to 3 digits for sorting n = 0 for line in liDft: stPage = getNumericPage(line[8], line) if stPage == None: print('af getNumericPage; stPage = None; line[8] = ', line[8]) print(line) input() liDft[n][8] = stPage n += 1 #sort by source,document,volume,page,section,part liDft = sorted(liDft, key=itemgetter(4,5,6,8,16,21)) #Remove leading zeros of page #s n = 0 for line in liDft: stPage = line[8] stPage = removeZeros(stPage, line) liDft[n][8] = stPage n += 1 # for debugging only - print out parts of sorted dft file flDebug = open('dft.sort', 'w', encoding = 'latin1') for line in liDft: [title,subtitle,composer,orig_comp,source,document,volume,date,page,editor,encoder,arranger,intabulator,contributor,concordances,piece,section,mType,mKey,difficulty,ensemble,part,remarks,recording,facsimile,fronimo,PDF,midi,created,modified] = line print(getKey(line), file=flDebug) print(fronimo[22:], file=flDebug) stPrint = "Section=\"" + section + "\";part=\"" + part + "\";ensemble=\"" + ensemble + "\"" print(stPrint, file=flDebug) print("-----------------------------", file=flDebug) flDebug.flush() # Make empty list of settings for output liSettings = [] # From sorted list, liDft, get key from first line dftLine = liDft[0] lastKey = getKey(dftLine) lastSection_and_part = get_section_and_part(dftLine) # Initialize setting list setting = [] # Add fields common to all parts in the first setting # Part data for first line is added below setting = addCommon(dftLine, setting) #Walk through sorted list of parts for dftLine in liDft: currKey = getKey(dftLine) if currKey == lastKey: currSection_and_part = get_section_and_part(dftLine) # need a new section name or a new part name first = True if lastSection_and_part == currSection_and_part: ft3File =dftLine[25] ft3File = ft3File[30:] ft3File = ft3File[:-4] print(ft3File, file=flError) flError.flush() # add data for a new section or part setting = addTo(dftLine,setting) else: setting = finish(setting) # Add current setting to list of settings liSettings.append(setting) # initialize new setting setting = [] # Starting work on next setting # Add items common to all parts in a new setting setting = addCommon(dftLine, setting) # add data for first section or part setting = addTo(dftLine, setting) # We have a new setting; reset key lastKey = getKey(dftLine) lastSection_and_part = get_section_and_part(dftLine) # get the last setting liSettings.append(setting) # Write settings list to a json file writeSettings(liSettings, flSettings) # end main if __name__ == "__main__": main()