#!/usr/bin/env python3

"""
 blfastaout.py - Translate FASTA table output into other formats, and open those
     files using the appropriate application, or write output to a file.

 Synopsis:
  blfastaout.py infile  [--delete] --destination dest [--outfile filename]

       infile - tsv file produced by fasta3 -m "F8C" option

       --delete - Delete infile when finished. This is mainly intended for running
            from BioLegato, where infile is a temporary file.

       --destination dest - dest is one of the following:
           For FASTA viewable Report:
                 textedit - open output files in text editor
                   specified by the $BL_TextEditor environment variable
                 browser - open in web browser specified by $BL_browser
                 textfile - write to files, using the basename
                    specified by destination.
                 htmlfile - write to HTML file
            For FASTA tsv Report:
                 blnfetch - Open in blnfetch, BioLegato interface for retrieving DNA/RNA
                     entries using ACCESSION numbers
                 blpfetch - Open in blpfetch, BioLegato interface for retrieving protein
                     entries using ACCESSION numbers
                 tsvfile - write to a tsvfile.

       --outfile - filename for saving an output file.

@modified: November 9, 2024
@author: Brian Fristensky
@contact: brian.fristensky@umanitoba.ca
"""

import argparse
import os
import shutil
import subprocess
import sys

DEBUG = True

class Parameters:
    """
      	Wrapper class for command line parameters
      	"""
    def __init__(self):
        """
     	  Initializes arguments:
     		IFN=""
     		DESTINATION=""
     		OFN=""
     		PID = str(os.getpid())
     	  Then calls read_args() to fill in their values from command line
          """
        self.IFN = ""
        self.DELETEFILE = False
        self.DESTINATION = ""
        self.OFN = ""
        self.PID = str(os.getpid())
        self.read_args()


    def read_args(self):
        """
        	Read command line arguments into a Parameter object

        	"""
        parser = argparse.ArgumentParser()
        parser.add_argument("infile", type=str,
                          help="Input filename")
        parser.add_argument("--delete", action="store_true", default=False,
                          help="Delete input file when done")
        parser.add_argument("--destination", type=str, default="blnfetch",
                          help="Name of program to launch or type of file to save")
        parser.add_argument("--outfile", type=str, default="",
                          help="Filename used when action is to save a file")
        args = parser.parse_args() 
        self.IFN = args.infile 
        self.DELETEFILE = args.delete
        self.DESTINATION = args.destination
        self.OFN = args.outfile

        if DEBUG :
            print('------------ Parameters from command line ------' + '\n')
            print('    IFN: ' + self.IFN)
            print('    DELETEFILE: ' + str(self.DELETEFILE))
            print('    DESTINATION: ' + self.DESTINATION)
            print('    OFN: ' + self.OFN)   

# -------------------------------------------------------------------------------
class FastaTable:
    """
      	Methods and data for FASTA output in table format.
        
      	"""
    def __init__(self):
        """
     	  Initializes arguments:
     		rawlines = []
                headerlines = []
                database = ""
          """

        self.header = []
        self.ColumnHeadings = []
        self.Data = []
        self.Database = ""
        
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def ReadOutput(self,IFN) :
        """
        For tabular FASTA output, parse the Fields line so that the names of 
        fields appear as column headings.

        """

        # To make processing easier, get rid of lines after the list of
        # hits. The first line after the hits starts with STOPFLAG
        tempfile=open(IFN,"r")
        lines = []
        #for l in tempfile.readlines() :
        #    lines.append(l.strip())
        l = tempfile.readline()
        STOPFLAG = "# FASTA processed"
        while not l == "" and not l.startswith(STOPFLAG) :
            lines.append(l.strip())
            l = tempfile.readline()
        tempfile.close()
        
        # The column headings generated by FASTA include some fields not found
        # in the output, so we get rid of those headings.
        def FixColumnHeadings(line) :
            tokens = ["subject id"]
            tokens = tokens + line.split(",")[4:]
            return tokens

        # First, we process the comment lines, printing all comments but the Field line
        # Then, we print the modified field line.
        i = int(0)
        while (i < len(lines)) and (lines[i].startswith("#")) :
            # Hold on to the comment line with the column headings. 
            if lines[i].startswith("# Fields:") :
               self.ColumnHeadings = FixColumnHeadings(lines[i])
               print(self.ColumnHeadings)
            elif lines[i].startswith("# Database:") :
               RawDBLine = lines[i]
               self.header.append(lines[i])               
            else : 
               self.header.append(lines[i])
            i += 1

        # Next, extract the name of the database from the Database: line eg.
        # # Database: @/home/psgendb/BIRCHDEV/dat/fasta/refseqgene.nam
        # We'll use this in GetOtherData which retrieves title and species info
        # for each hit, which FASTA doesn't put into the table report.
        # A user-defined database will not contain the '@' character. Set it to the file path
        # in that case
        if RawDBLine.find('@') > -1 :
            self.Database = RawDBLine.rpartition("/")[2].partition(".")[0]
            if self.Database == 'mouse_genome' : # SPECIAL CASE
                self.Database = 'GCF_000001635.26_top_level'
            elif self.Database == 'human_genome' : # SPECIAL CASE :
                self.Database = 'GCF_000001405.38_top_level'
        else :
            self.Database = "userfile"

        # Now read the rest of the file
        while i < len(lines) :
            if not lines[i].startswith("#") :
                fields = lines[i].split("\t")
                #We have to build a list of fields from each input line
                # Omit field 0, which is the query name
                # The first field is field[1], but we have to extract the
                # accession number from that field.
                datalist = []
                if self.Database == "userfile" :
                    datalist.append(fields[1])
                else:
                    # Subject field depends on which database you are searching.
                    # If subject id line is in the form
                    # gi|1464306813|ref|NC_038294.1|
                    if '|' in fields[1] :
                        datalist.append(fields[1].split("|")[3])
                    else :
                        datalist.append(fields[1])
                #Now, add on the rest fo the fields
                x = 2
                while x < len(fields): 
                    datalist.append(fields[x].strip())
                    x += 1   
                #print(datalist)
                self.Data.append(datalist)
            i += 1

        return


    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # Add fields from dictionary D to Data
    def AddInfo(self,D):

        i = 0
        while i < len(self.Data) :    
            if self.Data[i][0] in D :
                j = 1
                for f in D[self.Data[i][0]] :
                    self.Data[i].insert(j,f)
                    j += 1
            i += 1

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def WriteTable(self,OFN):
    
        TAB = "\t"
        NL = "\n"

        outfile=open(OFN,"w")

        # Write header lines
        for l in self.header :
            outfile.write(l + NL)

        # Write column headings
        FieldLine = "#" + self.ColumnHeadings[0]
        if not self.Database == "userfile" :
            FieldLine = FieldLine + TAB + "title" + TAB + "species" + TAB + "taxid"
        for f in self.ColumnHeadings[1:] :
            FieldLine = FieldLine + TAB + f
        outfile.write(FieldLine + NL)

        # Write datalines
        for datalist in self.Data :
            # Construct an output line, and then write it to the output file
            outline = datalist[0]
            for f in datalist[1:] :
                outline = outline + TAB + f   
            outfile.write(outline + NL)                   
        outfile.close()
        return


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def RunBlastdbcmd(P,FT):
    """
    Run blastdbcmd to retrieve title and species information for FASTA hits.

    FT - fasta table data
    
    returns a dictionary of lists, with data for each 

    """

    # First, create a temporary file listing accession numbers as input for blastdbcmd
    accfilename = P.PID +  ".acc"
    accfile = open(accfilename,'w')
    for row in FT.Data :
        accfile.write(row[0] + "\n")
    accfile.close()

    # Run blastdbcmd to save the desired fields into a temporary output file
    OFN = P.PID + ".tmplist"
    p = subprocess.Popen(["blastdbcmd","-db", FT.Database, "-entry_batch",accfilename, "-outfmt", "%a|%t|%S|%T", "-out",OFN])        
    p.wait()

    # Import the blastcmd output into a dictionary keyed by accession number.
    # Strip off the whitespace (ie. \n) as each line is read.
    infodict = {}
    infofile = open(OFN,'r')
    lines = []
    for l in infofile.readlines() :
        lines.append(l.strip())
    infofile.close()
    for l in lines :
        fields = l.split("|")
        infodict[fields[0]] = fields[1:]

    # remove temp files
    os.remove(accfilename)
    os.remove(OFN)

    return infodict


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def RUNTEXTEDIT(OFN):
    """
    Run the texteditor in the background and
    remove the temporary file when done
    """
    COMMAND = '(nohup `choose_edit_wrapper.sh` ' + OFN + '; $RM_CMD ' + OFN + ' > /dev/null)&'
    # It's surprising how many issues there are with launching multiple
    # files in a text editor. choose_edit_wrapper.sh takes care of
    # these issues.
    #COMMAND = '($BL_TextEditor ' + OFN + '; $RM_CMD ' + OFN + ')&'
    os.system(COMMAND)           	
    return

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def RUNBROWSER(OFN,DELETEFILE):
    """
    Run web browser in the background and
    remove the temporary file when done
    """
    #COMMAND = '(nohup `chooseviewer.py` ' + OFN + '; $RM_CMD ' + OFN + ' > /dev/null)&'
    if DELETEFILE :
        p = subprocess.Popen(['chooseviewer.py', OFN, '--delete'])
    else :
        p = subprocess.Popen(['chooseviewer.py', OFN])
    p.wait()
    #COMMAND = '(nohup $BL_Browser ' + OFN + '; $RM_CMD ' + OFN + ' > /dev/null)&'
    #os.system(COMMAND)  
         	
    return

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def RunBioLegato(DESTINATION, FN):
    """
    Run the blnfetch or blpfetch in the background and 
    remove the temporary file when done
    """
    COMMAND = '(nohup ' + DESTINATION + ' ' +  FN + '; rm -f ' + FN + ' > /dev/null)&'
    os.system(COMMAND)
           	
    return

           
#======================== MAIN PROCEDURE ==========================
def main():
    """
        Called when not in documentation mode.
    """
	
    P = Parameters ()

    
    # Write the output to a file, or send it to a window, as specified
    # in --destination
	
    if P.DESTINATION == 'textedit':
        RUNTEXTEDIT(P.IFN)

    elif P.DESTINATION == 'browser':
        RUNBROWSER(P.IFN,True)	       

    elif P.DESTINATION == 'textfile':
        shutil.copy2(P.IFN,P.OFN)        

    elif P.DESTINATION == 'htmlfile':
        shutil.copy2(P.IFN,P.OFN)
	    
    elif P.DESTINATION in ['blnfetch','blpfetch','tsvfile'] :
        FT = FastaTable()
        FT.ReadOutput(P.IFN)
        PARSEDOFN = P.PID + '.' + 'tsv'
        if not FT.Database == "userfile" :
            infodict = RunBlastdbcmd(P,FT)
            FT.AddInfo(infodict)    
        FT.WriteTable(PARSEDOFN)
        if P.DESTINATION in ['blnfetch','blpfetch'] :
            RunBioLegato(P.DESTINATION,PARSEDOFN)  
        else :
            os.rename(PARSEDOFN,P.OFN)          	   

    # If --delete, delete the input file when done
    if P.DELETEFILE :
        os.remove(P.IFN)

if "-test" in sys.argv:
    pass
else:
    main()