import json, re, os
from operator import itemgetter
import platform
import subprocess

currOS = platform.platform()
if currOS.find('Windows') == 0:
    LOCALBASE = 'D:/website/'
    TEMPFILE = 'C:/TEMP/programs/slTemp.json'
else:
    LOCALBASE = '/mnt/d/website/'
    TEMPFILE = '/mnt/c/TEMP/programs/s1Temp.json'

INSTFILE = LOCALBASE + "insts.tsv"
DFTFILE = LOCALBASE + "dft.tsv"
SETTINGSFILE = LOCALBASE + "settings.json"
SEDFILE = '~/bin/makelatin1.sed'
ERRORFILE = LOCALBASE  + "slerr.txt"
CORRECTUTF = 'sed -f' + ' ' + SEDFILE + ' '  + TEMPFILE + ' > ' + SETTINGSFILE
INSTNOTFOUND = 9999
liDft = [] # global list of dft records
glLine = 0 #global indicating position in liDft
diInsts = {} #global dictionary of instrument order
field = {} #global dictionary of dft fields

def get_dft_list():
    global liDft
# read dft.tsv lines into an list of lists, liDft
    flDft = open (DFTFILE, "r", encoding = 'ISO 8859-1')
    for line in flDft:
        line = line[:-1] # lop off \n at end of record
        liDft.append(line.split("\t"))
# Returns global liDft

def get_fields(liFields):
    global field
    n = 0
    for fld in liFields:
        fld = fld.lower()
        fld = fld.strip()
        field[fld] = n
        n += 1
# end get_fields

# get list of instruments (which are in proper order for display)
def get_insts():
    n = 0
    with open(INSTFILE) as file:
        for line in file:
            diInsts[line.split('\t')[0].strip()] = n
            n += 1
# returns global diInsts

# we need this to get keys in proper order
def get_numeric_page(stPage):
    #remove spaces
    stPage = stPage.replace(' ', '')
    # Where is the erroneous key?
    if stPage[0] in ['#', '%']:
        suffix = ''
        if stPage[-1].isalpha():
            suffix = stPage[-1]
            stPage = stPage[:-1]
        stNum = stPage[1:].zfill(3)
        return stPage[0] + stNum + suffix
    # So it must begin with 'f.' or 'p.'
    val = re.search('[fp]{1,2}\. ?([^,.][^,.]*)', stPage, re.DOTALL)
    if not val:
        return '9999'
    # only take the first of a possible list of pages, for the purpose of sorting
    stPage = val[1]
    if stPage[0].isdigit():
        val = re.search('([0-9][0-9]*)([v,a-n,x-z]*)$', stPage, re.DOTALL)
        # It's a numeric page number with a possible suffix, like 49vb
        if val:
            stNumber = val[1].zfill(3)
            suffix = val[2]
            return stNumber + suffix
    #It's  a sig, like a4v, not a folio or page number
    else:
        return(stPage)

def add_zeros():
    global liDft
    global flErr
    n = 0
    for line in liDft:
        #Assign untitled section a value of "0"
        if  line[16] == "":
            line[16] = "0"
        if line[8] == '':
            print("in addZeros; blank page #\n", line, file=flError)
        else:
            stPage = get_numeric_page(line[8])
        # This should not happen
        if stPage == "":
            print('af get_numeric_page; stPage = ""; line[8] = ', line[8])
            print(line)
            input()
        liDft[n][8] = stPage
        n += 1
# end add_zeros

def remove_zeros():
    global liDft
    global field
    n = 0
    for dftRec in liDft:
        stPage = dftRec[field['page']]
        # stPage should not be undefined or = ""
        stPage.replace(' ', '')
        if stPage == '' or stPage == None:
            # means an error
            stPage = '9999'
        if stPage == '00' or stPage == '000':
            stPage = '0'
        else:
            stPrefix = ""
            if stPage[0] == '%' or stPage[0]=='#':
                stPrefix = stPage[0]
                stPage = stPage[1:]
                while stPage[0] == '0':
                    stPage = stPage[1:]
                if stPage == '':
                    stPage = '0'
                stPage = stPrefix + stPage
            else: # it was f. or p.
                if stPage[0].isdigit():
                    # It's a numeric page number with a possible suffix, like 49vb
                    val = re.search('([0-9][0-9]*)([v,a-n,x-z]*)$', stPage, re.DOTALL)
                    if not val:
                        stPage = '9999'
                    else:
                        stNumber = val[1]
                        # in case of something like stPage = '000c'
                        if int(stNumber) == 0:
                            stNumber = '0'
                        else:
                            # Remove leading zeros
                            while stNumber[0] == '0':
                                stNumber = stNumber[1:]
                        suffix = val[2]
                        stPage = stNumber + suffix
                # Else it's  a sig, like a4v, not a folio or page number,
                # and we leave it unchanged.
        liDft[n][field['page']] = stPage
        n += 1
#end remove_zeros

def get_key():
    global liDft
    global glLine
    liRec = liDft[glLine]
    return [liRec[field['source']].split(':')[0].strip(),
            liRec[field['document']].split(':')[0].strip(),
            liRec[field['volume']].strip(),
            liRec[field['page']].strip()]

def get_type_name():
    global liDft
    global glLine
    stType = liDft[glLine][field['type']]
    # if section name is '0', then assume there is only one section.
    # set setting name to be unique to this setting
    if stType == None or stType == '':
        return "unknown"
    else:
        return stType
    return secName

def get_section_name():
    global liDft
    global glLine
    secName = liDft[glLine][field['section']]
    # if section name is '0', then assume there is only one section.
    # set setting name to be unique to this setting
    if secName == '0':
        secName = '000' + liDft[glLine][field['page']]
    return secName

def inst_number(element):
    global diInsts
    if element.find(':') != -1:
        element = element.split(':')[1]
    element = element.replace(' ','')
    return diInsts.get(element.lower(), INSTNOTFOUND)

def first_inst_number(element):
     #search on 1st element of instrument list in part
    inst = element['instruments'][0].lower()
    if inst.find(':') != -1:
        inst = inst.split(':')[1] #If there is a ':', take 2nd element
    inst = inst.strip()
    return inst_number(inst)

def get_part(section):
    global liDft
    global glLine
    [title,subtitle,composer,orig_comp,source,document,volume,date,page,editor,encoder,arranger,intabulator,contributor,concordances,piece,section,mType,mKey,difficulty,ensemble,part,remarks,recording,facsimile,fronimo,PDF,midi,created,modified] = liDft[glLine]
    #Add data specific to this part
    if part == "":
        part = ensemble
    part = strip_list(part, ',')
    liPartInsts = part.split(',')
    liEnsemble = ensemble.split(',')
    partInx = 0 # position in the part list of intruments
    # Checking for entries like "soprano 2" and adding ":soprano" to them.
    for inst in liPartInsts:
        inst = inst.strip()
        if inst_number(inst) == INSTNOTFOUND:
            for newInst in liEnsemble:
                if newInst.find(':') != -1:
                    # found in in the ensemble list
                    ensInst = newInst.split(':')[0].strip()
                    if ensInst == inst:
                        # change it to = entry in ensemble list
                        liPartInsts[partInx] = newInst.strip()
                        break #from for newInst in liEnsemble
        else:
            liPartInsts[partInx] = inst

        #end of if inst_number(inst) == INSTNOTFOUND
        partInx += 1
    #end for inst in liPartInsts
    # sort instruments used in this part here
    liPartInsts.sort(key=inst_number)
    # Create part, with data
    diPart = {'instruments':liPartInsts,'fronimo':fronimo, 'PDF':PDF, 'midi':midi, 'remarks':remarks, 'ctime':created, 'mtime':modified}
    # this is the only place where we go to the next dft record
    glLine += 1
    return(diPart)
#getPart

# List of parts ends at section name change
def get_parts(section):
    global glLine
    #initialize list of parts
    liParts = []
    lastSecName = section['secName']
    while True:
        part = get_part(section)
        liParts.append(part)
        # Reached the end of the list
        if glLine >= len(liDft):
            break
        # Or found a new section
        currSecName = get_section_name()
        if currSecName != lastSecName:
            break
    # Sort parts here on 1st instrument of each part 
    liParts.sort(key=first_inst_number)
    return  liParts
#end get_parts
    
def get_section():
    global glLine
    secName = get_section_name()
    stType = get_type_name()
    section = {}
    section['secName'] = secName
    stType= strip_list(stType, ",")
    liTypes = stType.split(',')
    section['type'] = liTypes
    liParts = get_parts(section)
    section['parts'] = liParts
    name = section['secName']
    if name.find('000') == 0:
        section['secName'] = ""
    numPart = 0
    # Change e.g. soprano 1:soprano to soprano 1
    while numPart < len(liParts):
        liInsts = liParts[numPart]['instruments']
        numInst = 0
        while numInst < len(liInsts):
            inst = liInsts[numInst]
            if inst.find(':') != -1:
                inst = inst.split(':')[0]
                liParts[numPart]['instruments'][numInst] = inst
            numInst += 1
        numPart += 1    
    return section

# List of sections ends at change of key
def get_sections(setting):
    # get at least one section
    lastKey = get_key()
    liSections = []
    while True:
        section = get_section()
        liSections.append(section)
        # Reached the end of the list
        if glLine >= len(liDft):
            break
        # Or check for a key change
        currKey = get_key()
        if currKey != lastKey:
            break
    return liSections
#end get_sections

# Get next setting
def get_setting(settingNum):
    global liDft
    global glLine
    [title,subtitle,composer,orig_comp,source,document,volume,date,page,editor,encoder,arranger,intabulator,contributor,concordances,piece,section,mType,mKey,difficulty,ensemble,part,remarks,recording,facsimile,fronimo,PDF,midi,created,modified] = liDft[glLine]
    #NEW: include volume # in document name.
    if volume != "":
        document = document + ', v.' + volume
    #NEW: modify key to one readable by WP
    mKey = mKey.replace("b", "♭")
    mKey = mKey.replace("#", "♯")
    mKey = mKey.replace("m", " minor")
    mKey = mKey.replace("M", " major")
    #NEW Modify title, subtitle to insert appropriate newline
    title = title.replace('|', '\\\n')
    subtitle = subtitle.replace('|', '\\\n')
    ensemble = strip_list(ensemble, ',')
    liEnsemble = ensemble.split(",")
    liEnsemble.sort(key=inst_number)
    #Initialize setting by loading data common to all sections
    setting = {'setting':settingNum,'title':title,'subtitle':subtitle,'composer':composer,'orig_comp':orig_comp,'source':source,'document':document,'volume':volume,'date':date,'page':page,'editor':editor,'encoder':encoder,'arranger':arranger,'intabulator':intabulator,'contributor':contributor,'concordances':concordances,'difficulty':difficulty,'mKey':mKey,'recording':recording, 'facsimile':facsimile, 'liEnsemble':liEnsemble}
    # There should always be at least one section
    liSections  = get_sections(setting)
    setting['Sections'] = liSections
    return(setting)

# termination condition = end of liDft list
def get_settings():
    global liDft
    global glLine
    liSettings = []
    liEnd = len(liDft)
    glLine = 0
    lastDoc = liDft[0][5] + liDft[0][6] # Document + volume
    settingNum = 0 # First setting number in document is 0
    setting = get_setting(settingNum)
    while glLine < liEnd: # glLine reset from get_part in get_section in get_setting
        liSettings.append(setting)
        # reset setting number at start of new document/volume
        currDoc = liDft[glLine][5] + liDft[glLine][6] # Document + volume
        if currDoc != lastDoc:
            settingNum = 0
        else:
            settingNum += 1
        lastDoc = currDoc
        setting = get_setting(settingNum)
    return(liSettings)

# get rid of white space in comma or colon-separated strings
def strip_list(stIn, separator):
    stOut = ''
    stIn = stIn.strip()
    liIn = stIn.split(separator)
    length = len(liIn) - 1
    for item in liIn:
        if length > 0:
            stOut = stOut + item.strip() + separator
            length -= 1
        else:
            stOut = stOut + item.strip()
    return(stOut)

def strip_source_and_document(n):
    lsRec = liDft[n]
    # 4th field is source
    src = lsRec[4]
    lsRec[4] = strip_list(src, ':')
    # 5th field is document
    doc = lsRec[5]
    lsRec[5] = strip_list(doc, ':')
    return lsRec

def main():
    global liDft
    global flError
    flTemp = open(TEMPFILE, "w", encoding='ISO 8859-1')
    flError = open(ERRORFILE, "w", encoding='ISO 8859-1')
    # Reads all dft data into global liDft
    get_dft_list()
    #Reads all data on instrument order into global dictionary diInsts
    # Delete and save header line
    liHeaders = liDft.pop(0)
    # get dictionary of fields
    get_fields(liHeaders)
    # get dictionary of instruments
    get_insts()
    #normalize numeric page designations to 3 digits for sorting
    add_zeros()
    #get rid of leading blanks
    n = 0
    for item in liDft:
        liDft[n] = strip_source_and_document(n)
        n += 1
    #sort by source,document,volume,page,section
    liDft.sort(key=itemgetter(
                              field['source'],
                              field['document'],
                              field['volume'],
                              field['page'],
                              field['section']
                             )
              )
    #Remove leading zeros of page #s
    remove_zeros()
    #Get list of settings
    liSettings = get_settings()
    #Write list of settings in json format
    stSettings = json.dumps(liSettings, indent=4)
    print(stSettings)
	
# end main
    
if __name__ ==  "__main__":
    main()