"""
studentlink.py - read fields for student data, clean the input,
and write to an output file.

Assumptions:
1)Every file has an ID field to identify each student.
Other files contain different sorts of information on students,
all keyed by ID number.
2)The ID field is always the first column.

SYNOPSIS
    studentlink.py filelist outfile

"""

import re
import sys

#-------------------- Parameters ----------------------
"""
    Wrapper for command line parameters.
"""

class Parameters :

    def __init__(self):
        self.listfile = ""
        self.outfile = ""
        self.SEP = "," # we could add a step to read this as a command
                       # line parameter

    def read_args(self):
        # For a production script, this code should be re-written
        # using the Python argparse class.
        self.listfile = sys.argv[1]
        self.outfile = sys.argv[2]


#------------------------- Filelist ------------------
class FileList :
    """
    list of files to be read
    """
    def __init__(self):
        self.filenames = []

    def ReadFileList(self,FN):
        lfile = open(FN,'r')
        lines = lfile.readlines()
        for l in lines :
            name = l.strip()
            if len(name) > 0 :
                self.filenames.append(name)
                #print(name)
        lfile.close()      


#------------------------- Header ------------------
class Header :
    """
    The header is a list of dictionary key,value pairs.
    The keys are column headings found. 
    For consistency, we force column headings into uppercase.
    The values are column numbers.
    eg. if the header was ID,MET,gender then the dictionary
    would be {[ID,0],[MET,1],[gender,2]}
    """
    def __init__(self):
        self.FieldName = {}

    def GetHeader(self,firstline,SEP) :
        """
        Given a raw input line, parse out the column headings using SEP
        as separator
        """
        FieldsFound = firstline.split(SEP)
        colnum=0
        for f in FieldsFound :
            fs = f.strip() # strip removes leading and trailing whitespace characters
            if len(fs) > 0 :
                capname = fs.upper()
                if not capname in self.FieldName :
                    self.FieldName[fs]=colnum
                    colnum += 1
        print('Headers read: ' + str(self.FieldName))


#------------------------- Student ------------------
class Student :
    """
    Data for a given student.
    """
    def __init__(self):
        self.MET = "NA"
        self.gender = "9"
        self.score = -1      

    def WriteStudent(outfile):
        """
        Write data for a student to output file.
        """  
        #outstring = self.ID + SEP + self.MET + SEP + self.gender + SEP + self.score
        #print(outstring)
        #outfile.write(outstring)


#------------------------- Table of Students ------------------
class STable :

    def __init__(self):
        self.students = {}

    def GetID(Field):
        """
        Given a field, return an ID
        In this case, all we do is return the field stripped of leading and
        trailing whitespace
        """ 
        ID = Field.strip()
        return ID

    def GetMET(Field):
        """
        Given a field, return an ID
        In this case, all we do is return the field stripped of leading and
        trailing whitespace
        """ 
        MET = Field.strip()
        MET=Field.replace([" ","-"], " ")
        return MET

    def GetGender(Field):
        """
        Given a field, return a gender
        In this case, all we do is return the field stripped of leading and
        trailing whitespace
        """ 
        Gender = Field.strip()

        if Gender[0] == '1' :
            Gender='F'
        elif Gender == '2' :
            Gender='M'
        else :
            Gender = Gender.upper()
            if not Gender[0] in ['F','M'] :
                Gender = '?'
        return Gender


    def ReadStudentFile(self,sfile,LegalHeadings,SEP) :
        lines = sfile.readlines()

        # Header is the first line, a special case
        HeadingsFound = Header()
        HeadingsFound.GetHeader(lines[0],SEP) 
        if 'ID' in HeadingsFound.FieldName :
        
        # process all remaining lines
            for l in range(1, len(lines)) :
                tokens = lines[l].split(SEP)
                print(tokens)
                IDnum = tokens[0].strip()
                if not IDnum in self.students :
                    self.students[IDnum] = Student()

                for t in range(1, len(tokens)):
                    ts = tokens[t].strip()
                    if ts == 'MET' :
                        self.students[IDnum].MET = self.GetMET(ts)
                    elif ts == 'GENDER' :
                         self.students[IDnum].gender = self.GetGender(ts)                   


#--------------------------- main procedure ---------------

# read command line parameters
P = Parameters()
P.read_args()

print("listfile= " + P.listfile)
print("outfile= " + P.outfile)

# read a list of files to process
infiles = FileList()
infiles.ReadFileList(P.listfile)

# Make a first pass through the files. In this pass, we read
# the first line of each file to get a complete list of which
# headers are present. 
ColumnHeadings = Header()
for FN in infiles.filenames :
    print(FN)
    F = open(FN,'r')
    firstline = F.readline()
    ColumnHeadings.GetHeader(firstline,P.SEP)
    F.close()

print(ColumnHeadings.FieldName)

# Make a second pass through the files, now reading the data
# indicated by the column headings.
Students = STable()
for FN in infiles.filenames :
    print('Processing ' + FN)
    F = open(FN,'r')
    Students.ReadStudentFile(F,ColumnHeadings,P.SEP)
    F.close()

# Write the merged files to the output file
#outfile = open(P.outfile,'w')
#for S in Students.students :
#    S.WriteStudent(outfile)