#!/usr/bin/python3.4

from pprint import pprint, pformat
from difflib import SequenceMatcher
import os
import csv
import re
import sys
#import distance
import collections

from unicode import latin1_to_ascii
from strings import IncrementString

MAXINT = 2**31 # KLUDGE

DIR_DATA = "." # KLUDGE

"""
Definitions:

    A Piece() is an abstract concept that encompasses all concrete
        realizations (Settings()) of a particular piece of
        music. E.g. Greensleeves is an original tune that has been
        arranged in many different ways and in many different keys. These
        individual arrangements are called Settings() of the Piece()
        called Greensleeves.

    A Part() represents a single .ft3 file and contains information
        extracted from it by Sarge's dft.pl program. It may either be
        for a single instrument or voice, or for several, or even be
        the entire score of a Setting(), but it is always derived from
        a single file.

    A Section() is composed of one or more Parts() for an identified
        sub-portion of a Setting().  A Section() is analogous to a
        movement in a symphony or other large work. Parts() are
        identified as being associated with a Section() using meta
        data in the file.

    A Setting() is composed of either (but not both):
        1. one or more Parts(), or 2. one or more Sections().
        If there are Sections *and* Parts, then this is an
        editorial error that is flagged for correction.

The dft.tsv file represents all information extracted (by dft.pl) from
all .ft3 files in the collection. A certain amount of normalization has
already been accomplished by dft.pl.

The routine Setting.create_from_csv() consumes the dft.tsv file and generates a Part()
for each row.  A Part's signature is composed of the tuple (Source,
Document, Page, Date) and is used to group Parts() into Settings(). I.e
all Parts() with the same signature are *by definition* in the same
Setting(). Once all Parts() have been created and grouped, each Setting()
has its setting.analyze_sections() routine invoked to discover if there
are Sections() that can be inferred from information common amongst
various Parts() in the Setting().

Eventually there will be the assignment of Settings() to Pieces(),
but the exact nature of discovering Setting() >> Piece() is still in flux.
More than likely it will be accomplished via one or more META files.

Invariants:
    1. All parts in a setting have the title
    2. All part instrument lists must be in same order as ensemble field

In the file name:
    _P = Performance
    _O = Ornamented

    score is in part.part, should be sole value and will not show up in ensemble

Composer detail
    need exception sort for some composers
    type
        title
Insturments
    14 course and also handle smaller number of courses
        if lute pitch is specified, smaller courses must have same pitch
        Unspecified pitch == G for lutes and archlutes
        Unspecified pitch == A for theorbo

"""

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'


def gen_csv(csv_file, delimiter="\t", use_dict=False, clean_empty_end=False):
    with open(csv_file, encoding="ISO-8859-1") as csvfile:
        if use_dict:
            reader = csv.DictReader(csvfile, delimiter=delimiter)
        else:
            reader = csv.reader(csvfile, delimiter=delimiter)

        for line in reader:
            # eat empty trailing fields
            if clean_empty_end:
                while True:
                    x = line.pop()
                    if x == '':
                        continue
                    line.append(x)
                    break

            yield line
    # with
# gen_csv

def ForceUtf8(strIn):
    strOut = strIn
    try:
        strOut = strOut.decode('latin1')
    except:
        pass

    strOut = strOut.encode(encoding="UTF-8")
    return strOut
# ForceUtf8


reSep = re.compile(r'[.,]\s*')
reSpaces = re.compile(r"\s\s+")
reNum = re.compile(r"\s*\d+$")
reOther = re.compile(r"\s*\|\s*(duet|ground|treble)")

"""
def GetInstruments():
    # Prepopulate the Instruments dictionary.
    for i in Instrument.objects.all():
        mpNameInst[i.name] = i
# GetInstruments
GetInstruments()

"""

def str_homogenize(str_in):
    tmp = str_in.lower()    # force to lower
    tmp = re.sub(r'-', ' ', tmp) # all white space goes to one space
    tmp = re.sub(r'\s+', ' ', tmp) # all white space goes to one space
    tmp = latin1_to_ascii(tmp) # replace any accent characters
    return tmp
# str_homogenize

def str_similarity(a, b, typo_min):
    ratio = SequenceMatcher(None, a, b).ratio()
    if ratio > typo_min:
        # might be a typo
        a_tmp = a.lower()
        b_tmp = b.lower()
        if a_tmp == b_tmp:
            return "CAPS   "

        a_tmp = re.sub(r'^\s+', '', a_tmp)
        b_tmp = re.sub(r'^\s+', '', b_tmp)
        if a_tmp == b_tmp:
            return "LEAD SPC"

        a_tmp = re.sub(r'\s+', ' ', a_tmp)
        b_tmp = re.sub(r'\s+', ' ', b_tmp)
        if a_tmp == b_tmp:
            return "INT SPC"

        hyph = r'-+\s+'
        a_tmp = re.sub(hyph, r'', a_tmp)
        b_tmp = re.sub(hyph, r'', b_tmp)
        if a_tmp == b_tmp:
            return "HYPHEN "

        punct = r'["\'<>.,;:!?$%#@()&^\[\]{}-]+'
        a_tmp = re.sub(punct, r'', a_tmp)
        b_tmp = re.sub(punct, r'', b_tmp)
        if a_tmp == b_tmp:
            return "PUNCT  "

        a_tmp = latin1_to_ascii(a_tmp)
        b_tmp = latin1_to_ascii(b_tmp)
        if a_tmp == b_tmp:
            return "ACCENTS"

        a_tmp = re.sub(r'^\s*([0-9]+[a-z]?\.?\s+)?', r'', a_tmp)
        b_tmp = re.sub(r'^\s*([0-9]+[a-z]?\.?\s+)?', r'', b_tmp)
        if a_tmp == b_tmp:
            return "LEAD NM"

        a_tmp = re.sub(r'([0-9]+[a-z]?\.?\s+)', r'', a_tmp)
        b_tmp = re.sub(r'([0-9]+[a-z]?\.?\s+)', r'', b_tmp)
        if a_tmp == b_tmp:
            return "INT NUM"

        if len(a) != len(b):
            return "LENGTH "

    return ratio
# str_similarity

def Indent(level=0):
    return "    " * level

def EnsembleEncode(ensemble):
    insts = ensemble.split(",")
    return None

def CmpEnsemble(a, b):
    return a


def is_iterable(obj):
    try:
        tmp = iter(obj)
        return True
    except TypeError:
        return False
# is_iterable

class LuteMixIn(object):
    def __init__(self, *args, **kwargs):
        self._errors = [] # to accumulate discovered errors for editorial views

    def save(self):
        pass

    @classmethod
    def get(kls, key, fail=True):
        if key in kls._OBJECTS or fail:
            return kls._OBJECTS[key] # will fail if no such key
        return None

    @classmethod
    def add(kls, key, obj):
        # what about overwrites?
        kls._OBJECTS[key] = obj
        #kls.add(key, obj)

    @classmethod
    def get_or_create(kls, key, *args, **kwargs):
        created = False
        obj = kls.get(key, fail=False)
        if not obj:
            save = commit = False
            if 'save' in kwargs:
                save = kwargs['save']
                del kwargs['save']
            if 'commit' in kwargs:
                save = kwargs['commit']
                del kwargs['commit']
            obj = kls(key, *args, **kwargs)
            created = True
            #print(key, obj, save, commit)
            if save:
                #obj.save()
                kls.add(key, obj)
                if commit:
                    #transaction.commit()
                    pass
        return (obj, created)

    @classmethod
    def all(kls, sorted=False):
        if sorted:
            ret = [(k,v) for k,v in kls._OBJECTS.items()]
            ret.sort(key=lambda a: a[0])
            return ret
        else:
            return kls._OBJECTS

    def map_fields(self, fieldsIn):
        #fields_out = []
        #for f in 
        pass

    def dump_fields(self, indent=0, depth=MAXINT,
                keys='*', ignore_empty=False, strip_newlines=True,
                join_lines="\n", join_fields=None,
                max_width=MAXINT,
                field_fmt="%-15s: %s", error_fmt="%-15s: %s", #error_fmt=bcolors.FAIL+"%-15s: %s"+bcolors.ENDC,
                 errors_only=False):
        # Dump the selected fields from the object
        #print(errors_only, str(self))
        if hasattr(self, "_errors"):
            errors = self._errors
        else:
            errors = []
        if errors_only:
            if len(errors) == 0:
                return None

        fields = []
        mpKeyValue = vars(self)
        vkeys = sorted(mpKeyValue.keys())

        abbrev_map = None
        if hasattr(self, 'abbrev_map'):
            abbrev_map = self.abbrev_map

        print_errors = False
        if 'errors' in keys:
            print_errors = True
            keys.remove('errors')

        if not keys or '*' in keys:
            if hasattr(self, 'field_name_map'):
                keys = [f for f in self.field_name_map.values()]
            else:
                keys = vkeys

        field_name_reverse_map = None
        if hasattr(self, 'field_name_reverse_map'):
            field_name_reverse_map = self.field_name_reverse_map

        for k in keys:
            if abbrev_map:
                if k not in vkeys and k in abbrev_map:
                    k = abbrev_map[k]

            val = None
            if k in vkeys:
                val = mpKeyValue[k]
                #print(k, val)
                if val is None or (is_iterable(val) and len(val) == 0):
                    if not ignore_empty:
                        val = " "
                    else:
                        continue
                val = str(val)
                #print(k); pprint(val)
                if strip_newlines and ("\n" in val):
                    val = re.sub(r'\s*\n\*', ' ', val)
                #print(field_fmt)
                if field_name_reverse_map:
                    k = field_name_reverse_map[k]
                #print(k, val)
                fields.append(field_fmt % (k,val))
            elif error_fmt:
                errors.append("Bad field name: %s" % k)
        # for all keys

        if print_errors and len(errors) > 0:
            fields.insert(0, error_fmt % ("ERRORS", str(errors)))

        if join_fields:
            # we are returning a single line, so only do Indent at beginning
            ret = Indent(indent) + join_fields.join(fields)
            if len(ret) > max_width:
                ret = "%.*s..." % (max_width-3, ret)
            return ret
        else:
            ret = []
            for f in fields:
                tmp = Indent(indent) + f
                if len(tmp) > max_width:
                    tmp = "%.*s..." % (max_width-3, tmp)
                ret.append(tmp)
            return join_lines.join(ret)
    # dump_fields

    def add_error(self, error):
        self._errors.append(error)

    def fmt_errors(self, fmt=None, indent=0, header=True):
        if not self._errors:
            return ''

        if not fmt:
            ret = []
            if header:
                ret.append(Indent(indent) + "ERRORS:")
            for e in self._errors:
                ret.append(Indent(indent+1) + e)
            return "\n".join(ret)
        assert("Unknown error format type")
    # fmt_errors
# class LuteMixIn

class Difficulty(LuteMixIn):
    _OBJECTS = {}
    @classmethod

    def create_difficulties():
        diffs = {
            u"Beginner":     10,
            u"Easy":         20,
            u"Medium":       30,
            u"Challenge":    40,
            u"Difficult":    50,
            u"Virtuoso":     60,
            }
        for k,v in diffs.items():
            diff = Difficulty(name=k, level=v)
            diff.save()
    # CreateDifficulties
# class Difficulty

class Key(LuteMixIn):
    @classmethod
    def create_key_list(kls):
        key_order = [
            u"AM", u"Am", u"A#M", u"A#m", u"AbM", u"Abm",
            u"BM", u"Bm", u"B#M", u"B#m", u"BbM", u"Bbm",
            u"CM", u"Cm", u"C#M", u"C#m", u"CbM", u"Cbm",
            u"DM", u"Dm", u"D#M", u"D#m", u"DbM", u"Dbm",
            u"EM", u"Em", u"E#M", u"E#m", u"EbM", u"Ebm",
            u"FM", u"Fm", u"F#M", u"F#m", u"FbM", u"Fbm",
            u"GM", u"Gm", u"G#M", u"G#m", u"GbM", u"Gbm",
            ]
        keys = {
        'Ab':   u"A"+flat,
        'AbM':  u"A"+flat,
        'Am':   u"A minor",
        'AM':   u"A",
        'Bbm':  u"B"+flat+" minor",
        'BbM':  u"B"+flat,
        'Bm':   u"B minor",
        'BM':   u"B",
        'Cm':   u"B minor",
        'CM':   u"C",
        'Dbm':  u"D"+flat+" minor",
        'DbM':  u"D"+flat+" minor",
        'Dm':   u"D minor",
        'DM':   u"D",
        'D major':  u"D",
        'Ebm':  u"E"+flat+" minor",
        'EbM':  u"E"+flat,
        'Em':   u"E minor",
        'EM':   u"E",
        'Fm':   u"F minor",
        'FM':   u"F",
        'Gm':   u"G minor",
        'GM':   u"G",
        }
        # where did this come from???
        # 'GM A minor, BbM for Bb major, C

        for k,v in keys.items():
            key = Key(name=v, csv_name=k)
            key.save()
    # create_key_list
# class Key

class MusicType(LuteMixIn):
    _OBJECTS = {}

    def __init__(self, name, official=False, parent=None):
        self.name = name
        self.official = official
        self.parent=parent

    @classmethod
    def create_from_typemap(kls, typemap_file):
        # This is a bit KLUDGEy

        lines = []
        with open(typemap_file, encoding="ISO-8859-1") as typemap:
            for line in typemap:
                if re.match(r'\s*#', line) or re.match(r"^\s*$", line):
                    continue
                while re.search(r'\s*\\\s*$', line):
                    line = re.sub(r'\s*\\\s*$', r'', line)
                    tmp = next(typemap)
                    line += tmp.strip(" \t")
                line = re.sub(r'[,\s]*$', r'', line)
                lines.append(line)
        #with

        #for l in lines: print(l)
        #1/0

        cat = canon = None
        for line in lines:
                #print(line)
                if re.match(r'\s*#', line):
                    continue
                line = re.sub(r",\s", r',', line)
                if line.startswith("\t\t"):
                    line = re.sub(r"^[\s\t]*", r'', line)
                    #print("Dip:", line)
                    dips = line.split(',')
                    for dip_name in dips:
                        (diplomatic, created) = kls.get_or_create(dip_name, official=True, parent=canon, save=True, commit=True)
                elif line.startswith("\t"):
                    line = re.sub(r"^[\s\t]*", r'', line)
                    #print("Can:", line)
                    canon = None
                    canons = line.split(',')
                    for canon_name in canons:
                        (tmp, created) = kls.get_or_create(canon_name, official=True, parent=cat, save=True, commit=True)
                        if canon == None:
                            canon = tmp
                else:
                    line = re.sub(r"^[\s\t]*", r'', line)
                    #print("Cat:", line)
                    cat = None
                    cats = line.split(',')
                    for cat_name in cats:
                        (tmp, created) = kls.get_or_create(cat_name, official=True, save=True, commit=True)
                        if cat == None:
                            cat = tmp
            # for each line
        # with

        #kls.dump_tree()
    # create_from_typemap

    @classmethod
    def dump_tree(kls):
        all = kls.all()
        for name,mt in all.items():
            print("[%s] [%s]" % (name, mt.name))
        return
        tlts = [v for k,v in all.items() if v.parent == None]
        for tlt in tlts:
            print("Top: ", tlt.name)
            canons = [v for k,v in all.items() if v.parent == tlt]
            for c in canons:
                print(Indent(1), "Canonnical:", c.name)
                ps = [v for k,v in all.items() if v.parent == c]
                if ps:
                    print(Indent(2), "Diplomatics:")
                    for p in ps:
                        print(Indent(3), p.name)
    # dump_tree


    def dump(self):
        tmp = [self.name]
        if self.parent: tmp.append("parent: %s" % self.parent.name)
        #if self.official: tmp.append("official: True")
        return "\n\t".join(tmp)
# class MusicType

class Instrument():
    mpNameInst = {}
    _id = 0

    def __init__(self, name, fullname, order):
        self.name = name
        self.fullname = fullname
        self.order = order
        #name = models.CharField(max_length=254)
        #slug = models.CharField(max_length=254)
        #fullname = models.CharField(max_length=254)
        #order = models.CharField(max_length=64, default="ZBOGUS")

    @classmethod
    def add(kls, inst=None, name=None, fullname=None, order=None):
        if not inst:
            if not order:
                order = "ZZZZZBOGUS"
            inst = kls(name=name, fullname=fullname, order=order);
            kls._id += 1
            inst.id = kls._id
        #inst.save()
        kls.mpNameInst[inst.name] = inst
        return inst

    @classmethod
    def create_from_file(kls, filename):
        # fleshes out insturment list from INSTRUMENT_ORDER
        inst_order_file = filename
        #instruments = [ForceUtf8(i) for i in open(inst_order_file).read().splitlines()]
        instruments = [i for i in open(inst_order_file).read().splitlines()]
        order = 1
        for name in instruments:
            if name == '':
                continue
            #print(type(name))
            #print(name)
            if name.startswith('#='):
                # This line sets the ordering to a new value
                # This allows us to leave gaps for future instruments
                order = name[2:]
                continue

            if name.startswith('#'):
                continue
            # strip EOL comments
            name = re.sub(r'#.*$', '', name)

            if '|' in name:
                name, fullname = name.split(' | ')
            else:
                fullname = name
            inst = kls.add(name=name, fullname=fullname, order=order)
            order = IncrementString(order)
            #print "%s (%s) %s" % (fullname, name, order)
        if False:
            # for all lines in the instruments file
            for i in sorted(kls.mpNameInst.values(), key=lambda a: a.order):
                print("%s [%s]" % (i, i.order))
    # createInstruments

    @classmethod
    def order_from_instruments(kls, instsIn):
        # Single instrument ordering is based on scanner/INSTRUMENTS.alpha
        # Groups (including group of 1) have their instrument count
        # prepended to a concatenation of the individual instrument
        # ordering codes. Mostly, this puts single insturments at the
        # top and large groups of multiple insturments at the bottom,
        # but the score is always last.

        num_insts = ord('0')
        ch_num_insts = None
        tmp = []
        #pprint(instsIn); #1/0
        for i in instsIn:
            if i:
                if not i.name in kls.mpNameInst:
                    # unknown insturment!
                    inst = Instrument.add(name=i.name, fullname=i.name)
                    #inst.save()
                    kls.mpNameInst[i.name] = inst
                order = kls.mpNameInst[i.name].order
                if order[0] in '0123456789':
                    # insturment (probably the score) has its own number
                    if ch_num_insts is None or order[0] > ch_num_insts:
                        ch_num_insts = order[0]
                    order = order[1:] # clip it off
                tmp.append(order)

        tmp.sort()
        if ch_num_insts is None:
            ch_num_insts = chr(ord('0') + len(tmp)) # yields '1' through '9'
        order = ch_num_insts + "".join(tmp)
        #print(order)
        return order
    # orderFromInstruments

    @classmethod
    def clean_instrument(kls, sIn):
        sOut = sIn
        sOut.strip()
        sOut = sOut.lower()
        sOut = re.sub(r"( [a-z]\b)", lambda pat: pat.group(0).upper(), sOut)
        sOut = reSpaces.sub(' ', sOut)
        sOut = reNum.sub('', sOut)
        sOut = reOther.sub('', sOut)
        return sOut
    # clean_instrument

    @classmethod
    def get_instruments(kls, sIn):
        tmp = reSep.split(sIn)
        ret = []
        for i in tmp:
            iname = kls.clean_instrument(i)
            if iname in kls.mpNameInst:
                inst = kls.mpNameInst[iname]
            else:
                #(inst, created) = kls.objects.get_or_create(iname)
                created = True
                inst = kls.add(name=iname)
                if created:
                    inst.fullname = iname
                    inst.order = "ZZBOGUS" + str(inst.id)
                    #inst.save()
                    kls.mpNameInst[inst.name] = inst
            ret.append(inst)
        #ret.sort(lambda a,b: cmp(a.order, b.order))
        ret.sort(key=lambda a: a.order)
        #pprint(ret)
        return ret
    # get_instruments

    def __repr__(self):
        if self.name == self.fullname:
            return self.name
        else:
            return "%s [%s]" % (self.name, self.fullname)
#class Instrument

class Country(LuteMixIn):
    _OBJECTS = {}

    def __init__(self, name):
        self.name = name

    def __str__(self):
        return self.name

# class Country


class ComposerAlias(LuteMixIn):
    # NOTE: potential m2m
    _OBJECTS = {}

    def __init__(self, name, composer):
        self.name = name
        self.composer = composer

    def __str__(self):
        return self.name
# class ComposerAlias


class Composer(LuteMixIn):
    # NOTE: potential m2m
    _OBJECTS = {}

    #full_name = models.CharField(max_length=254)
    #first_name = models.CharField(max_length=254)
    #last_name = models.CharField(max_length=254)
    #sort_name = models.CharField(max_length=254, blank=True) # this is the field we sort on
    #suspicious = models.BooleanField(default=False)
    #aliases = models.ManyToManyField(ComposerAlias, blank=True)
    #country = models.ForeignKey(Country, blank=True, null=True)
    #country_origin = models.ForeignKey(Country, blank=True, related_name="%(app_label)s_%(class)s_related_origin", null=True)
    #date_of_birth = models.DateField(blank=True, null=True)
    #date_of_death = models.DateField(blank=True, null=True)
    #url_info = models.CharField(max_length=254, blank=True)
    #html = models.TextField(blank=True)
    
    def __init__(self, full_name=None, first_name=None, last_name=None, country=None, country_origin=None):
        self.full_name = full_name
        self.last_name = last_name
        self.sort_name = latin1_to_ascii(self.last_name)
        self.first_name = first_name
        self.country = country
        self.country_origin = country_origin
        if self.country:
            (self.country, created) = Country.get_or_create(self.country)
        if self.country_origin:
            (self.country, created_origin) = Country.get_or_create(self.country_origin)
        self.aliases = []
    # __init__

    @classmethod
    def create_from_file(kls, csv_file):
        with open(csv_file, encoding="ISO-8859-1") as csvfile:
            reader = csv.reader(csvfile, delimiter="\t")

            for (i, cdata) in enumerate(reader):
                #print(i,cdata)
                # If more than 3 fields, the non-empty ones are aliases
                aliases = [x for x in cdata[3:] if x != '']

                full_name = str(cdata[0])
                if full_name == "Composer" and last_first == "Last, First":
                    # this is a bogus header line left over from the Reagan era.
                    continue

                last_first = str(cdata[1])
                country_name = 'Unknown'
                if len(cdata) > 2:
                    country_name = str(cdata[2])
                tmp = [x for x in re.split(r'\s*,\s*', last_first)]
                first = ''
                last = tmp[0]
                if len(tmp) > 1:
                    first = tmp[1]

                comp = Composer(full_name=full_name,
                            first_name=first, last_name=last,
                            country=country_name, country_origin=country_name)
                #comp.save() # so we have id for m2m
                kls.add(comp.full_name, comp)

                for a in aliases:
                    if a:
                        ca = ComposerAlias(a, comp)
                        #ca.save()
                        comp.aliases.append(ca)
                    #for a in comp.aliases: print(a.name)
                #comp.save()
        # with
    # CreateComposers

    def save(self, *args, **kwargs):
        if not self.sort_name:
            self.sort_name = self.last_name
        super(Composer, self).save(*args, **kwargs)

    def name(self):
        name = ''
        if self.first_name:
            if self.last_name:
                name = "%s %s" % (self.first_name, self.last_name)
            else:
                name = self.first_name
        elif self.last_name:
            name = self.last_name
        else:
            name = 'No Name'
        return smart_unicode(name)
    # name

    def __unicode__(self):
        return self.name()

    @classmethod
    def composer_list(cls):
        comp_all = Composer.objects.order_by('sort_name')

        tmp = []
        comp_ids = {}
        for c in comp_all:
            if not c.piece_set.count():
                continue
            if c.sort_name:
                c.ch_sort = c.sort_name[0].upper()
                tmp.append(c)

                alias = c.aliases.all()
                for a in alias:
                    if a.sort_name:
                        a.ch_sort = a.sort_name[0].upper()
                        a.composer = c
                        tmp.append(a)
            
        # this sort is spurious
        #tmp.sort(lambda a,b: cmp(a.ch_sort, b.ch_sort))

        no_name = []
        mpChComposer = {}
        aliases = {}
        for c in tmp:
            if not c.ch_sort in mpChComposer:
                mpChComposer[c.ch_sort] = []
            mpChComposer[c.ch_sort].append(c)

        # Finally we sort the sort characters, and then each list under that char
        keys = mpChComposer.keys()
        keys.sort()
        comp_list = []
        for ch in keys:
            mpChComposer[ch].sort(lambda a,b: cmp(a.sort_name, b.sort_name))
            comp_list.append( (ch, mpChComposer[ch]) )

        return comp_list
    # composer_list

    def dump(self):
        print("[%s] [%s] [%s] [%s] [%s]" % (self.full_name, self.first_name, self.last_name, self.sort_name, self.country.name))
        if self.aliases:
            for a in self.aliases:
                print(Indent(1), str(a))

    @classmethod
    def dump_all(kls):
        for (k,v) in kls.all(sorted=True):
            v.dump()

    def __str__(self):
        return self.full_name

    class Meta:
        ordering = ['last_name']
# class Composer


class Piece(LuteMixIn):
    _OBJECTS = {}
    _sorted = False

    # Signature: (??) title || composer
    def __init__(self, settings=None):
        super().__init__()
        self.settings = settings
        Piece.add(self)
        
    def add_setting(self, setting):
        if not self.settings:
            self.settings = []
        self.settings.append(setting)

    def dump(self, indent=0, depth=MAXINT, flags="", extra_flags=""):
        1/0

# class Piece

class Part(LuteMixIn):
    _OBJECTS = {}
    _sorted = False

    abbrev_map = {
        "ar": "arranger", "c0": "composer0", "cm": "composer",
        "cn": "contributor", "da": "date", "di": "difficulty",
        "do": "document", "ed": "editor", "ec": "encoder",
        "es": "ensemble", "fn": "footnote", "ff": "fronimo_file",
        "in": "intabulator", "insts": "instruments", "ky": "key",
        "md": "midi_file", "os": "orig_sub", "pg": "page",
        "pt": "part", "pf": "pdf_file", "pi": "piece",
        "pu": "publisher", "rm": "remarks", "sc": "section",
        "su": "subtitle", "ti": "title", "ty": "type",
    }
    field_name_map = collections.OrderedDict([
        ("Piece", "piece"), ("Section", "section"), ("Part", "part"),
        ("Ensemble", "ensemble"),
        ("Title", "title"), ("Orig. subtitle", "orig_sub"), ("Subtitle", "subtitle"),
        ("Type", "type"), ("Key", "key"), ("Difficulty", "difficulty"),
        ("Composer", "composer"), ("Orig. composer", "composer0"),
        ("Publisher", "publisher"),
        ("Document", "document"), ("Page", "page"),
        ("Editor", "editor"), ("Encoder", "encoder"),
        ("Arranger", "arranger"), ("Intabulator", "intabulator"),
        ("Contributor", "contributor"),
        ("Remarks", "remarks"), ("Footnote", "footnote"),
        ("Date", "date"),
        ("Fronimo", "fronimo_file"), ("PDF", "pdf_file"),
        ("Midi", "midi_file"),
        ])
    field_name_reverse_map = None

    def __init__(self, attrs):
        super().__init__()
        """Convert the dictionary into instance attributes."""
        for k, v in attrs.items():
            field_name = self.field_name_map[k]
            setattr(self, field_name, v)

        # For pretty printing
        if not Part.field_name_reverse_map:
            Part.field_name_reverse_map = dict( [(v,k) for k,v in Part.field_name_map.items()] )

        # The field "Piece" is from an earlier time and does **not** equate
        # with the curent meaning of Piece as an abstraction including all
        # Settings() that realize a Piece of music.
        if self.title == '':
            self.title = self.piece
        if self.part == '':
            self.part = self.ensemble
        """
            Archlute => archlute
        """

        self.ornamented = (re.search(r'^.*_[^/]*O[^/a-z]*\.ft3$', self.fronimo_file) is not None)
        self.performance = (re.search(r'^.*_[^/]*P[^/a-z]*\.ft3$', self.fronimo_file) is not None)

        self.set_instruments()

        self.section_order = 0
        if self.section:
            if self.section == "*":
                self.section_order = -1
                self.section = 'Complete'
            else:
                num = re.sub(r'^\s*(([0-9]+)\.)?\s+.*$', r'\2', self.section)
                try:
                    if num:
                        #print("Num: %s" % sir(num))
                        self.section_order = int(num)
                    else:
                        self.add_error("Section name has no number: %s" % self.section)
                except:
                    self.add_error("Bad Section name: %s" % self.section)

        self.fronimo_file = re.sub(r"\\", "/", self.fronimo_file)
        self.midi_file = re.sub(r"\\", "/", self.midi_file)
        self.pdf_file = re.sub(r"\\", "/", self.pdf_file)

        self.fronimo_file = re.sub(r"C:/website/", "", self.fronimo_file)
        self.midi_file = re.sub(r"C:/website/", "", self.midi_file)
        self.pdf_file = re.sub(r"C:/website/", "", self.pdf_file)

        self.fronimo_file = re.sub(r"http://gerbode.net/", "", self.fronimo_file)
        self.midi_file = re.sub(r"http://gerbode.net/", "", self.midi_file)
        self.pdf_file = re.sub(r"http://gerbode.net/", "", self.pdf_file)

        # Build the signature for grouping related files/parts together
        # The signature wants to:
        #   a: be as unique as possible
        #   b: without accidently excluding things that belong together
        # After a buncch of playing around it was determined that the
        # "best" signature was based on the originating doccument + the date.

        self.signature = "%s / %s / %s / %s" % (self.publisher, self.document, self.page, self.date)

        # Now establish our foreign keys
        if self.composer == '':
            self.composer = self.composer0
        if self.composer:
            (self.composer, created) = Composer.get_or_create(self.composer)
            if created:
                # this should not happen!
                pass
        if self.composer0:
            (self.composer0, created) = Composer.get_or_create(self.composer0)
            if created:
                # this should not happen!
                pass
        """
        if self.type:
            (self.type, created) = MusicType.get_or_create(self.type)
            if created:
                # this should not happen!
                pass
        """
    # __init__


    def set_instruments(self):
        self.instruments = Instrument.get_instruments(self.part)
        self.order = Instrument.order_from_instruments(self.instruments)
        # sanity check that instruments is a subest of ensemble

        if self.instruments[0].fullname == 'score':
            return

        ensem = dict((e.fullname,1) for e in Instrument.get_instruments(self.ensemble))
        insts = dict((i.fullname,1) for i in self.instruments)
        #print("parts: ", list(insts.keys()), "ensemble: ", list(ensem.keys()))
        for i in self.instruments:
            #print(i.fullname, ensem)
            if i.fullname not in ensem:
                self.add_error("'%s' in part but not in ensemble" % i.fullname)
    # set_instruments

    def ensemble_encode(self):
        self.enemble_sort = EnsembleEncode(self.insts)

    def get_title(self):
        return self.title

    def get_long_title(self):
        t = self.title
        if not t: t = '--'
        s = self.subtitle
        if not s: s = '--'
        sect = self.section
        if not sect: sect = '--'
        ret = "%s / %s / %s" % (t, s, sect)
        return ret

    def dump(self, indent=0, depth=MAXINT, flags="es ff", extra_flags="", ignore_empty=False, errors_only=False):
        depth -= 1
        if depth <=0:
            return ''

        if extra_flags:
            flags += " " + extra_flags
        fields = flags.split()

        keys = self.field_name_map.values()
        ret = ''
        ret = self.dump_fields(keys=fields, indent=indent, depth=depth, ignore_empty=ignore_empty, errors_only=errors_only)
        #print(ret); 1/0

        return ret

        if False:
            if print_section:
                return "%s%s || %s (%s)" % (Indent(indent), self.ensemble, self.fronimo_file, self.section)
                #return "%s%s (%d || %s || %s)" % (Indent(indent), self.fronimo_file, self.section_order, self.section, self.subtitle)
            else:
                return "%s%s || %s" % (Indent(indent), self.ensemble, self.fronimo_file)
    # dump()

# class Part

class Section( LuteMixIn):
    def __init__(self, parts=None):
        super().__init__()
        self.parts = parts
        self.title = None
        self.section_order = 0

    def set_title(self):
        if self.title:
            return
        # KLUDGE
        if self.parts:
            self.title = self.parts[0].section

    def add(self, part):
        if not self.parts:
            self.parts = []
        self.parts.append(part)
        self.parts.sort(CmpEnsemble)
        self.section_order = self.parts[0].section_order

    def dump(self, indent=0, depth=MAXINT, flags="", extra_flags="", ignore_empty=False, errors_only=False):
        ret = []
        #ret.append("%sSection: %s:" % (Indent(indent), self.title))
        depth -= 1
        if depth > 0:
            for p in self.parts:
                tmp = p.dump(indent+1, depth, flags=flags, extra_flags=extra_flags, ignore_empty=ignore_empty, errors_only=errors_only)
                if tmp:
                    # if errors_only is true, then tmp will be None if no errors
                    ret.append(tmp)
        if errors_only and not ret:
            return None
        ret.insert(0, "%s%s:" % (Indent(indent), self.title))
        return "\n".join(ret)
# class Section

class Setting(LuteMixIn):
    _OBJECTS = {}   # KLUDGE!  This is our "database" for the moment
    _sorted = False

    def __init__(self, signature, parts=None, sections=None):
        super().__init__()
        self.title = None
        self.signature = signature
        self.parts = parts
        self.sections = sections
        self.set_title()

    def set_title(self):
        if self.title:
            return
        # KLUDGE
        parts = self.all_parts()
        if parts:
            self.title = parts[0].title
            self.ensemble = parts[0].ensemble

    def add_section(self, section):
        if not self.sections:
            self.sections = []
        self.sections.append(section)
        section.set_title()

    def add_part(self, part):
        if not self.parts:
            self.parts = []
        self.parts.append(obj)

    def analyze_parts(self):
        return
        for p in self.all_parts():
            if p._errors:
                self.add_error("%s || %s" % (p.fmt_errors(header=False), p.fronimo_file))

    def analyze_sections(self):
        # Here we try to find if there is 1 or more Sections
        # and if so move the Parts into the Sections
        sects = collections.defaultdict(list)
        _parts = self.parts
        self.parts = []
        for p in _parts:
            if p.section_order:
                sects[p.section_order].append(p)
            else:
                self.parts.append(p)
        if len(sects) > 0:
            if len(self.parts) > 0:
                self.add_error("Setting has both Sections and unassociated Parts:\n\t%s (%s)" % (self.title, self.signature))
            #pprint(sects)
            for k,parts in sects.items():
                section = Section(parts)
                self.add_section(section)
                if section._errors:
                    self.add_error("%s || %s" % (section.title, section.fmt_errors(header=False)))
    # analyze_sections

    def analyze(self):
        self.analyze_parts()
        self.analyze_sections()

    def all_parts(self):
        parts = []
        # Note: the following allows for there to be both
        # Sections() with parts *and* unassociated parts,
        # a situation that should never happen.
        if self.sections:
            for s in self.sections:
                parts.extend(s.parts)
        if self.parts:
            parts.extend(self.parts)
        return parts

    def titles(self, min_len=1, clean_cruft=False):
        # return dict of (title, parts[])
        titles = collections.defaultdict(list)

        if self.sections:
            for s in self.sections:
                s_titles = collections.defaultdict(list)
                for p in s.parts:
                    s_titles[p.title].append(p)
                if len(s_titles.keys()) > 1:
                    for st,ps in s_titles.items():
                        titles[st] = s_titles[st]

        if self.parts:
            for p in self.parts:
                titles[p.title].append(p)

        if clean_cruft:
            tmp = collections.defaultdict(list)
            # Two special cases where titles that are not perfect matches
            # are consider to be a match:
            #   1. Title starts with 18., 18a., 18b., etc. and all can reduce to 18.
            #   2. Differ only in that they have a trailing (parenthetical note)
            
            for title in sorted(titles.keys()):
                # Normalize leading number variants
                t = re.sub(r'^\s*([0-9]+)[a-z]*\.(.*)$', r'\1. \2', title)
                # Remove trailing parens
                t = re.sub(r'\s*\(.*\)\s*$', r'', t)
                tmp[t].append(title)    # a list of titles that maps to this simpler title
                #print("%s\t%s" % (t, title))
            # for
            #pprint(tmp);

            """
            ret = {}
            for t,parts in tmp.items():
                if len(parts) > 1:
                    pass
            """

            # remove all but one of these simpler-but-not-the-same title from the list
            if len(tmp.keys()) < 2:
                # then all titles were *essentially* the same
                return None
        # number & paren removal

        if len(titles.keys()) < min_len:
            return None

        return titles
    # titles

    def ensembles(self):
        # return dict of (ensemble_name, parts[])
        ret = collections.defaultdict(list)
        for p in self.all_parts():
            ret[p.ensemble].append(p)
        return ret

    def dump(self, indent=0, depth=MAXINT, flags="", extra_flags="", ignore_empty=False, errors_only=False):
        ret = []
        depth -= 1
        if depth > 0:
            if self.sections:
                self.sections.sort(key = lambda x: x.section_order)
                for s in self.sections:
                    tmp = s.dump(indent=indent+2, depth=depth, flags=flags, extra_flags=extra_flags, ignore_empty=ignore_empty, errors_only=errors_only)
                    if tmp:
                        ret.append(tmp)
                if errors_only and len(ret) > 0:
                    ret.insert(0, "%sSections:" % Indent(indent+1))

            if self.parts:
                parts = []
                if self.sections:
                    if 'sc' not in (flags + extra_flags):
                        extra_flags += " sc"
                max = len(self.parts)
                for p in self.parts:
                    tmp = p.dump(indent=indent+2, depth=depth, flags=flags, extra_flags=extra_flags, ignore_empty=ignore_empty, errors_only=errors_only)
                    if not tmp:
                        continue
                    if max and parts:
                        parts.append(Indent(indent+2) + "====================================================")
                    max -= 1
                    parts.append(tmp)

                if self.sections and parts:
                    parts.insert(0, "%sParts:" % Indent(indent+1))

                if parts:
                    ret.extend(parts)

            if self._errors:
                ret.append(self.fmt_errors(indent=indent+1))

        if errors_only and len(ret) == 0:
            return None
        ret.insert(0, "%s%s (%s):" % (Indent(indent), self.title, self.signature))
        return "\n".join(ret)
    # dump

    @classmethod
    def create_from_csv(kls, csv_file):
        """
        Master ingest routine converting the output of dft.pl (aka dft.tsv)
        to Parts(), and then grouping Parts() into Settings().

        Returns a dict of (sig, setting). Under normal circumstances there should be
        exactly one setting per signature. If there are more than one, then that
        entry will be a list of settings with that sig, which is considered an ERROR.
        """

        mpSigParts = collections.defaultdict(list)

        with open(csv_file, encoding="ISO-8859-1") as csvfile:
            # generator to snarf weird "\lang123' patterns from file.
            reader = csv.DictReader(csvfile, delimiter="\t")

            for i,part_dict in enumerate(reader):
                tmp = {}
                # clean up crap in the dict
                for k,v in part_dict.items():
                    if k is None:
                        continue
                    if v is None:
                        print(i, k, v)
                        v = ''
                    v = re.sub(r'\\lang[0-9]+\s?', '', v)
                    tmp[k] = v
                part = Part(tmp) # pass cleaned up dict
                mpSigParts[part.signature].append(part)
            # for
        # with

        # Now that all of the parts are grouped together
        # we need to analyze them into their separate settings
        # and (possibly) sections-within-setting,
        for sig,parts in mpSigParts.items():
            setting = Setting(sig, parts)
            setting.analyze()
            Setting.add(sig, setting)
    # create_from_csv

# class Setting

def PrintSetting(setting, flags="*", ignore_empty=True, errors_only=False):
    tmp = setting.dump(flags=flags, ignore_empty=ignore_empty, errors_only=errors_only)
    if tmp:
        print(tmp)
        print()
# PrintSetting

def PrintSettings(**kwargs):
    # flags="pt ff errors", ignore_empty=True, errors_only=False):
    for (k,setting) in Setting.all(sorted=True):
        PrintSetting(setting, **kwargs)
# PrintSettings

def AnalyzeSettingDirectories(ignore_key=False):
    for k,setting in Setting.all(sorted=True):
        dirs = collections.defaultdict(list)
        for part in setting.all_parts():
            dir = re.sub(r'[^/]*$', r'', part.fronimo_file)
            dirs[dir].append(part)

        if len(dirs) > 1:
            # Sometimes we have the same setting in more than 1 key.
            # The directory names will be of the form:
            #   .../Dm/... vs. .../Am/...
            keys = collections.defaultdict(list)
            nokey = []
            for d,parts in dirs.items():
                m = re.search(r'[_/]([A-Z]b?[mM])/$', d)
                if not m:
                    key = "No Key"
                    nokey.append(parts)
                else:
                    key = m.group(1)
                    keys[key].append(parts)

            did_sig = False

            if keys:
                if ignore_key and not nokey:
                    continue
                if not ignore_key:
                    if not did_sig:
                        did_sig = True
                        print("%s (%s)\n" % (setting.title, setting.signature))
                    print("\tKeys")
                    for k,list_parts in keys.items():
                        print("\t    %s:" % k)
                        for l in list_parts:
                            for p in l:
                                print("\t\t%s: %s" % (p.key, p.fronimo_file))
                    print()

                if nokey:
                    if not did_sig:
                        did_sig = True
                        print("%s (%s)\n" % (setting.title, setting.signature))
                    print("\tNo key in filename")
                    for parts in nokey:
                        for p in parts:
                            print("\t    %s: %s" % (p.key, p.fronimo_file))
                    print()

            if not did_sig:
                print("%s (%s)\n" % (setting.title, setting.signature))

            for d,ps in dirs.items():
                tmp = [ "%s: %s" % (p.key, p.fronimo_file) for p in ps]
                print("\t%s\n\t    %s" % (d, "\n\t    ".join(tmp)))
            print()
            print("\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n")
    # for all settings
# AnalyzeSettingDirectories


def AnalyzeSettingComposers():
    settings = Setting.all()
    i = 0
    for k in sorted(settings.keys()):
        setting = settings[k]
        composers = collections.defaultdict(list)
        composers0 = collections.defaultdict(list)
        for p in setting.all_parts():
            composers[p.composer].append(p)
            composers0[p.composer0].append(p)

        if len(composers) > 1 or len(composers0) > 1:
            if len(composers) > 1:
                print("Multiple Composers")
                for c,ps in composers.items():
                    tmp = [p.fronimo_file for p in ps]
                    print("\t[%s]\n\t\t%s" % (c, "\n\t\t".join(tmp)))
                print()
            if len(composers0) > 1:
                print("Multiple Composer0s")
                for c,ps in composers0.items():
                    tmp = [p.fronimo_file for p in ps]
                    print("\t[%s]\n\t\t%s" % (c, "\n\t\t".join(tmp)))
                print()
            print("Setting:")
            PrintSetting(setting, flags="cm c0 ff", ignore_empty=True, errors_only=False)
            print("\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n\n")
    # for all settings
# AnalyzeSettingComposers


def DoMain():
    parts_file = "dft_py.tsv"
    #parts_file = "dft_test.tsv"
    music_types_file = "typemap.txt"
    composers_file = "compdata.tsv"
    instruments_file = "instrument_order.txt"

    # We create Insturments, Composers, and MusicTypes
    # first because we have foreign key refs to them
    # from Parts.
    MusicType.create_from_typemap(music_types_file)
    Composer.create_from_file(composers_file)
    Instrument.create_from_file(instruments_file)

    Setting.create_from_csv(parts_file)
    #AnalyzeSettingComposers()
    #AnalyzeSettingDirectories(ignore_key=False)

    #PrintSettings("cm c0 pt ff errors", errors_only=False)
    PrintSettings(flags="* errors", ignore_empty=True)


if __name__ == "__main__":
    DoMain()