import json, re, os from operator import itemgetter import platform import subprocess currOS = platform.platform() if currOS.find('Windows') == 0: LOCALBASE = 'D:/website/' TEMPFILE = 'C:/TEMP/programs/slTemp.json' else: LOCALBASE = '/mnt/d/website/' TEMPFILE = '/mnt/c/TEMP/programs/s1Temp.json' INSTFILE = LOCALBASE + "insts.tsv" DFTFILE = LOCALBASE + "dft.tsv" SETTINGSFILE = LOCALBASE + "settings.json" SEDFILE = '~/bin/makelatin1.sed' ERRORFILE = LOCALBASE + "slerr.txt" CORRECTUTF = 'sed -f' + ' ' + SEDFILE + ' ' + TEMPFILE + ' > ' + SETTINGSFILE INSTNOTFOUND = 9999 liDft = [] # global list of dft records glLine = 0 #global indicating position in liDft diInsts = {} #global dictionary of instrument order field = {} #global dictionary of dft fields def get_dft_list(): global liDft # read dft.tsv lines into an list of lists, liDft flDft = open (DFTFILE, "r", encoding = 'ISO 8859-1') for line in flDft: line = line[:-1] # lop off \n at end of record liDft.append(line.split("\t")) # Returns global liDft def get_fields(liFields): global field n = 0 for fld in liFields: fld = fld.lower() fld = fld.strip() field[fld] = n n += 1 # end get_fields # get list of instruments (which are in proper order for display) def get_insts(): n = 0 with open(INSTFILE) as file: for line in file: diInsts[line.split('\t')[0].strip()] = n n += 1 # returns global diInsts # we need this to get keys in proper order def get_numeric_page(stPage): #remove spaces stPage = stPage.replace(' ', '') # Where is the erroneous key? if stPage[0] in ['#', '%']: suffix = '' if stPage[-1].isalpha(): suffix = stPage[-1] stPage = stPage[:-1] stNum = stPage[1:].zfill(3) return stPage[0] + stNum + suffix # So it must begin with 'f.' or 'p.' val = re.search('[fp]{1,2}\. ?([^,.][^,.]*)', stPage, re.DOTALL) if not val: return '9999' # only take the first of a possible list of pages, for the purpose of sorting stPage = val[1] if stPage[0].isdigit(): val = re.search('([0-9][0-9]*)([v,a-n,x-z]*)$', stPage, re.DOTALL) # It's a numeric page number with a possible suffix, like 49vb if val: stNumber = val[1].zfill(3) suffix = val[2] return stNumber + suffix #It's a sig, like a4v, not a folio or page number else: return(stPage) def add_zeros(): global liDft global flErr n = 0 for line in liDft: #Assign untitled section a value of "0" if line[16] == "": line[16] = "0" if line[8] == '': print("in addZeros; blank page #\n", line, file=flError) else: stPage = get_numeric_page(line[8]) # This should not happen if stPage == "": print('af get_numeric_page; stPage = ""; line[8] = ', line[8]) print(line) input() liDft[n][8] = stPage n += 1 # end add_zeros def remove_zeros(): global liDft global field n = 0 for dftRec in liDft: stPage = dftRec[field['page']] # stPage should not be undefined or = "" stPage.replace(' ', '') if stPage == '' or stPage == None: # means an error stPage = '9999' if stPage == '00' or stPage == '000': stPage = '0' else: stPrefix = "" if stPage[0] == '%' or stPage[0]=='#': stPrefix = stPage[0] stPage = stPage[1:] while stPage[0] == '0': stPage = stPage[1:] if stPage == '': stPage = '0' stPage = stPrefix + stPage else: # it was f. or p. if stPage[0].isdigit(): # It's a numeric page number with a possible suffix, like 49vb val = re.search('([0-9][0-9]*)([v,a-n,x-z]*)$', stPage, re.DOTALL) if not val: stPage = '9999' else: stNumber = val[1] # in case of something like stPage = '000c' if int(stNumber) == 0: stNumber = '0' else: # Remove leading zeros while stNumber[0] == '0': stNumber = stNumber[1:] suffix = val[2] stPage = stNumber + suffix # Else it's a sig, like a4v, not a folio or page number, # and we leave it unchanged. liDft[n][field['page']] = stPage n += 1 #end remove_zeros def get_key(): global liDft global glLine liRec = liDft[glLine] return [liRec[field['source']].split(':')[0].strip(), liRec[field['document']].split(':')[0].strip(), liRec[field['volume']].strip(), liRec[field['page']].strip()] def get_type_name(): global liDft global glLine stType = liDft[glLine][field['type']] # if section name is '0', then assume there is only one section. # set setting name to be unique to this setting if stType == None or stType == '': return "unknown" else: return stType return secName def get_section_name(): global liDft global glLine secName = liDft[glLine][field['section']] # if section name is '0', then assume there is only one section. # set setting name to be unique to this setting if secName == '0': secName = '000' + liDft[glLine][field['page']] return secName def inst_number(element): global diInsts if element.find(':') != -1: element = element.split(':')[1] element = element.replace(' ','') return diInsts.get(element.lower(), INSTNOTFOUND) def first_inst_number(element): #search on 1st element of instrument list in part inst = element['instruments'][0].lower() if inst.find(':') != -1: inst = inst.split(':')[1] #If there is a ':', take 2nd element inst = inst.strip() return inst_number(inst) def get_part(section): global liDft global glLine [title,subtitle,composer,orig_comp,source,document,volume,date,page,editor,encoder,arranger,intabulator,contributor,concordances,piece,section,mType,mKey,difficulty,ensemble,part,remarks,recording,facsimile,fronimo,PDF,midi,created,modified] = liDft[glLine] #Add data specific to this part if part == "": part = ensemble part = strip_list(part, ',') liPartInsts = part.split(',') liEnsemble = ensemble.split(',') partInx = 0 # position in the part list of intruments # Checking for entries like "soprano 2" and adding ":soprano" to them. for inst in liPartInsts: inst = inst.strip() if inst_number(inst) == INSTNOTFOUND: for newInst in liEnsemble: if newInst.find(':') != -1: # found in in the ensemble list ensInst = newInst.split(':')[0].strip() if ensInst == inst: # change it to = entry in ensemble list liPartInsts[partInx] = newInst.strip() break #from for newInst in liEnsemble else: liPartInsts[partInx] = inst #end of if inst_number(inst) == INSTNOTFOUND partInx += 1 #end for inst in liPartInsts # sort instruments used in this part here liPartInsts.sort(key=inst_number) # Create part, with data diPart = {'instruments':liPartInsts,'fronimo':fronimo, 'PDF':PDF, 'midi':midi, 'remarks':remarks, 'ctime':created, 'mtime':modified} # this is the only place where we go to the next dft record glLine += 1 return(diPart) #getPart # List of parts ends at section name change def get_parts(section): global glLine #initialize list of parts liParts = [] lastSecName = section['secName'] while True: part = get_part(section) liParts.append(part) # Reached the end of the list if glLine >= len(liDft): break # Or found a new section currSecName = get_section_name() if currSecName != lastSecName: break # Sort parts here on 1st instrument of each part liParts.sort(key=first_inst_number) return liParts #end get_parts def get_section(): global glLine secName = get_section_name() stType = get_type_name() section = {} section['secName'] = secName stType= strip_list(stType, ",") liTypes = stType.split(',') section['type'] = liTypes liParts = get_parts(section) section['parts'] = liParts name = section['secName'] if name.find('000') == 0: section['secName'] = "" numPart = 0 # Change e.g. soprano 1:soprano to soprano 1 while numPart < len(liParts): liInsts = liParts[numPart]['instruments'] numInst = 0 while numInst < len(liInsts): inst = liInsts[numInst] if inst.find(':') != -1: inst = inst.split(':')[0] liParts[numPart]['instruments'][numInst] = inst numInst += 1 numPart += 1 return section # List of sections ends at change of key def get_sections(setting): # get at least one section lastKey = get_key() liSections = [] while True: section = get_section() liSections.append(section) # Reached the end of the list if glLine >= len(liDft): break # Or check for a key change currKey = get_key() if currKey != lastKey: break return liSections #end get_sections # Get next setting def get_setting(settingNum): global liDft global glLine [title,subtitle,composer,orig_comp,source,document,volume,date,page,editor,encoder,arranger,intabulator,contributor,concordances,piece,section,mType,mKey,difficulty,ensemble,part,remarks,recording,facsimile,fronimo,PDF,midi,created,modified] = liDft[glLine] #NEW: include volume # in document name. if volume != "": document = document + ', v.' + volume #NEW: modify key to one readable by WP mKey = mKey.replace("b", "♭") mKey = mKey.replace("#", "♯") mKey = mKey.replace("m", " minor") mKey = mKey.replace("M", " major") #NEW Modify title, subtitle to insert appropriate newline title = title.replace('|', '\\\n') subtitle = subtitle.replace('|', '\\\n') ensemble = strip_list(ensemble, ',') liEnsemble = ensemble.split(",") liEnsemble.sort(key=inst_number) #Initialize setting by loading data common to all sections setting = {'setting':settingNum,'title':title,'subtitle':subtitle,'composer':composer,'orig_comp':orig_comp,'source':source,'document':document,'volume':volume,'date':date,'page':page,'editor':editor,'encoder':encoder,'arranger':arranger,'intabulator':intabulator,'contributor':contributor,'concordances':concordances,'difficulty':difficulty,'mKey':mKey,'recording':recording, 'facsimile':facsimile, 'liEnsemble':liEnsemble} # There should always be at least one section liSections = get_sections(setting) setting['Sections'] = liSections return(setting) # termination condition = end of liDft list def get_settings(): global liDft global glLine liSettings = [] liEnd = len(liDft) glLine = 0 lastDoc = liDft[0][5] + liDft[0][6] # Document + volume settingNum = 0 # First setting number in document is 0 setting = get_setting(settingNum) while glLine < liEnd: # glLine reset from get_part in get_section in get_setting liSettings.append(setting) # reset setting number at start of new document/volume currDoc = liDft[glLine][5] + liDft[glLine][6] # Document + volume if currDoc != lastDoc: settingNum = 0 else: settingNum += 1 lastDoc = currDoc setting = get_setting(settingNum) return(liSettings) # get rid of white space in comma or colon-separated strings def strip_list(stIn, separator): stOut = '' stIn = stIn.strip() liIn = stIn.split(separator) length = len(liIn) - 1 for item in liIn: if length > 0: stOut = stOut + item.strip() + separator length -= 1 else: stOut = stOut + item.strip() return(stOut) def strip_source_and_document(n): lsRec = liDft[n] # 4th field is source src = lsRec[4] lsRec[4] = strip_list(src, ':') # 5th field is document doc = lsRec[5] lsRec[5] = strip_list(doc, ':') return lsRec def main(): global liDft global flError flTemp = open(TEMPFILE, "w", encoding='ISO 8859-1') flError = open(ERRORFILE, "w", encoding='ISO 8859-1') # Reads all dft data into global liDft get_dft_list() #Reads all data on instrument order into global dictionary diInsts # Delete and save header line liHeaders = liDft.pop(0) # get dictionary of fields get_fields(liHeaders) # get dictionary of instruments get_insts() #normalize numeric page designations to 3 digits for sorting add_zeros() #get rid of leading blanks n = 0 for item in liDft: liDft[n] = strip_source_and_document(n) n += 1 #sort by source,document,volume,page,section liDft.sort(key=itemgetter( field['source'], field['document'], field['volume'], field['page'], field['section'] ) ) #Remove leading zeros of page #s remove_zeros() #Get list of settings liSettings = get_settings() #Write list of settings in json format stSettings = json.dumps(liSettings, indent=4) print(stSettings) # end main if __name__ == "__main__": main()