/*  File: peptide.c
 *  Author: Richard Durbin (rd@sanger.ac.uk)
 *  Copyright (C) J Thierry-Mieg and R Durbin, 1994
 * -------------------------------------------------------------------
 * Acedb is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 * or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
 * -------------------------------------------------------------------
 * This file is part of the ACEDB genome database package, written by
 * 	Richard Durbin (MRC LMB, UK) rd@mrc-lmb.cam.ac.uk, and
 *	Jean Thierry-Mieg (CRBM du CNRS, France) mieg@kaa.cnrs-mop.fr
 *
 * Description:
 * Exported functions:
 * HISTORY:
 * Last edited: Nov 30 17:57 2001 (edgrif)
 * Created: Tue May 10 23:59:13 1994 (rd)
 * CVS info:   $Id: peptide.c,v 1.56 2001/12/03 11:09:43 edgrif Exp $
 *-------------------------------------------------------------------
 */

#include <wh/acedb.h>
#include <wh/aceio.h>
#include <wh/a.h>
#include <wh/bs.h>
#include <wh/peptide.h>
#include <wh/lex.h>
#include <whooks/sysclass.h>
#include <whooks/classes.h>
#include <whooks/systags.h>
#include <whooks/tags.h>
#include <wh/dna.h>
#include <wh/java.h>
#include <wh/chrono.h>


/********************************************************/

static BOOL pepDoDumpFastAKey(ACEOUT dump_out, KEY key, BOOL cds_only) ;
static int pepDoDumpFastAKeySet(ACEOUT dump_out, KEYSET kSet, BOOL cds_only) ;


/********************************************************/

char pepDecodeChar[] =
{ 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P',
  'Q', 'R', 'S', 'T', 'V', 'W', 'Y', 'X', '.', '*' } ;

signed char pepEncodeChar[] =
{ -1, -1, -1, -1, -1, -1, -1, -1, -4, -1, -1, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  -4, -1, -1, -1, -1, -1, -1, -1, -1, -1, 22, -1, -1, -1, -1, -1, /* '*' */
  -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -1, -1, -1, -1, -1, -1, /* digits */

  -1,  0, 20,  1,  2,  3,  4,  5,  6,  7, -1,  8,  9, 10, 11, -1,
  12, 13, 14, 15, 16, -1, 17, 18, 20, 19, 20, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
} ;

char *pepName[] =
{  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, "Stop",  0,  0,
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 

   0, "Alanine", "Asp/Asn", "Cysteine", "Aspartate", "Glutamate",
     "Phenylalanine", "Glycine", "Histidine", "Isoleucine", 0,
     "Lysine", "Leucine", "Methionine", "Asparagine", 0,
   "Proline", "Glutamine", "Arginine", "Serine", "Threonine",
     0, "Valine", "Tryptophan", "Unkown", "Tyrosine", 
     "Glu/Gln",  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 

   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
} ;		

char *pepShortName[] =
{  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, "***",  0,  0,
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 

   0, "Ala", "AsX", "Cys", "Asp", "Glu", "Phe", "Gly", "His", 
     "Ile", 0,"Lys", "Leu", "Met", "Asn", 0,
   "Pro", "Gln", "Arg", "Ser", "Thr", 0, "Val", "Trp", "XXX", "Tyr", 
     "GlX",  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 

   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
} ;		
/* mhmp 23.09.98 */
/* Antobodies A Laboratory Manual  Ed Harlow David Lane p. 661*/
/* Cold Spring Harbor Laboratory 1988 page 661 */
/* je mets le Stop a -1 et les Asp/Asn,  Glu/Gln  et Unknown a 0 */
int  molecularWeight[] =
{  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  0,
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 

   0, 89, 0, 121, 133, 147, 165, 75, 155, 131, 0,146, 131, 149, 132, 0,
   115, 146, 174, 105, 119, 0, 117, 204, 0, 181, 0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 

   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
} ;	
float NH2pK[] =
{  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  0,
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 

   0, 9.87, 0, 10.78, 9.6, 9.67, 9.24, 9.6, 8.97, 9.76, 0, 8.9, 9.6, 
   9.21, 8.8, 0,
   10.6, 9.13, 9.09, 9.15, 9.12, 0, 9.72, 9.39, 0, 9.11, 0,  0,  0, 0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 

   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
} ;		
float COOHpK[] =
{  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  0,
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 

   0, 2.35, 0, 1.71, 1.88, 2.19, 2.58, 2.34, 1.78, 2.32, 0,2.2, 2.36,
   2.28, 2.02, 0,
   1.99, 2.17, 2.18, 2.21, 2.15, 0, 2.29, 2.38, 0, 2.2, 0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 

   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
} ;		


/**************************************************************/

/* _VProtein is declared in classes.h/class.c */
static BOOL pepInitialise (void)
{ 
  KEY key ;
  if (!lexword2key("?Protein", &key, _VModel) && iskey(key)!=2)
    { 
      messerror("No model for Proteins, please edit wspec/models.wrm") ;
      return FALSE ;
    }
  return TRUE ;
}


/****************/
/*** next two functions were in dnacpt.c but should be here ***/

char codon (char* cp)
{
  switch(*cp)
    { 
    case T_: switch(*(cp+1))
      {
      case T_: switch(*(cp+2))
	{ 
	case T_: case C_: case Y_: return 'F';
	case A_: case G_: case R_: return 'L';
	default: return 'X' ;
	}
      case C_: return 'S' ;
      case A_: switch(*(cp+2))
	{ 
	case T_: case C_: case Y_: return 'Y';
	case A_: case G_: case R_: return '*';
	default: return 'X' ;
	}
      case G_: switch(*(cp+2))
	{ 
	case T_: case C_: case Y_: return 'C';
	case A_: return '*';
	case G_: return 'W';
	default: return 'X' ;
	}
      default: return 'X' ;
      }
    case C_: switch(*(cp+1))
      {
      case T_: return 'L' ;
      case C_: return 'P' ;
      case A_: switch(*(cp+2))
	{
	case T_: case C_: case Y_: return 'H';
	case A_: case G_: case R_: return 'Q';
	default: return 'X' ;
	}
      case G_: return 'R' ;
      default: return 'X' ;
      }
    case A_: switch(*(cp+1))
      {
      case T_: switch(*(cp+2))
	{
	case T_: case C_: case A_: 
	case Y_: case M_: case W_:
	case H_: return 'I';
	case G_: return 'M';
	default: return 'X' ;
	}
      case C_: return 'T' ;
      case A_: switch(*(cp+2))
	{
	case T_: case C_: case Y_: return 'N';
	case A_: case G_: case R_: return 'K';
	default: return 'X' ;
	}
      case G_: switch(*(cp+2))
	{
	case T_: case C_: case Y_: return 'S';
	case A_: case G_: case R_: return 'R';
	default: return 'X' ;
	}
      default: return 'X' ;
      }
    case G_: switch(*(cp+1))
      {
      case T_: return 'V' ;
      case C_: return 'A' ;
      case A_: switch(*(cp+2))
	{
	case T_: case C_: case Y_: return 'D';
	case A_: case G_: case R_: return 'E';
	default: return 'X' ;
	}
      case G_: return 'G' ;
      default: return 'X' ;
      }
  /***************** Wobble on first Letter *******************/
    case Y_:  /* T or C */ switch(*(cp+1))
      {
      case U_: switch(*(cp+2))
	{
	case A_: case G_: case R_: return 'L';
	default: return 'X' ;
	}
      default: return 'X' ;
      }
    case M_:  /* A or C */ switch(*(cp+1))
      {
      case G_: switch(*(cp+2))
	{
	case A_: case G_: case R_: return 'R';
	default: return 'X' ;
	}
      default:return 'X' ;
      }
    case R_: switch(*(cp+1))
      {
      case A_: switch(*(cp+2))
	{
	case T_: case C_: case Y_: return 'B'; /*Asp, Asn */
	default: return 'X' ;
	}
      default: return 'X' ;
      }
    case S_: switch(*(cp+1))
      {
      case A_: switch(*(cp+2))
	{
	case A_: case G_: case R_: return 'Z'; /* Glu, Gln */
	default: return 'X' ;
	}
      default: return 'X' ;
      }
    default: return 'X' ;
    }
}

/****************/

char reverseCodon (char* cp)
{
  char temp[3] ;

  temp[0] = complementBase[(int)cp[2]] ;
  temp[1] = complementBase[(int)cp[1]] ;
  temp[2] = complementBase[(int)cp[0]] ;
  return codon (temp) ;
}
/****************/

char antiCodon (char* cp)
{
  char temp[3] ;

  temp[0] = complementBase[(int)cp[0]] ;
  temp[1] = complementBase[(int)cp[-1]] ;
  temp[2] = complementBase[(int)cp[-2]] ;
  return codon (temp) ;
}
/****************************************/

void pepDecodeArray (Array pep)
{
  int i = arrayMax(pep) ;
  char *cp = arrp(pep,0,char) ;

  while (i--)
    { *cp = pepDecodeChar[(int)*cp] ;
      ++cp ;
    }
}

void pepEncodeArray (Array pep)
{
  int i = arrayMax(pep) ;
  signed char *cp = arrp(pep,0, signed char) ;

  while (i--)
    { *cp = pepEncodeChar[*cp] ;
      ++cp ;
    }
}

/*****************************/

static void peptideDoDump (ACEOUT dump_out, Array pep)
{
  register int i, k, k1, fin = arrayMax(pep) ;
  register char *cp, *cq ;
  char buffer [4100] ;

  i = fin ;
  cp = arrp(pep,0,char) ;
  cq = buffer ;

  while(i > 0)
    { cq = buffer ;
      memset(buffer,0,4100) ;
      for (k=0 ; k < 4000/50  ; k++)
        if (i > 0)
          { 
            k1 = 50 ;
            while (k1--  && i--)
              *cq++ = pepDecodeChar[*cp++ & 0xff] ;
            *cq++ = '\n' ; *cq = 0 ;
          }
      aceOutPrint (dump_out, "%s", buffer) ;
    }

  return;
} /* peptideDoDump */

/*****************************/

BOOL peptideDump (ACEOUT dump_out, KEY k) 
     /* DumpFuncType for class _VPeptide */
{
  Array pep = 0 ;

  if (!(pep = arrayGet(k, char, "c"))) 
    return FALSE ;

 
  peptideDoDump(dump_out, pep) ;

  arrayDestroy(pep);

  return TRUE ;
} /* peptideDump */

/************************************/

BOOL javaDumpPeptide (ACEOUT dump_out, KEY key) 
{ Array pep = arrayGet(key, char, "c") ;
 
  if (!pep)
    return FALSE ;

  aceOutPrint (dump_out, "?Peptide?%s?\t?peptide?",
	       freejavaprotect(name(key)));

  pepDecodeArray(pep) ;
  array(pep,arrayMax(pep),char) = 0 ; /* ensure 0 terminated */
  --arrayMax(pep) ;
  aceOutPrint (dump_out, "%s", arrp(pep,0,char)) ;
  aceOutPrint (dump_out, "?\n\n");

  arrayDestroy(pep) ;

  return TRUE;
} /* javaDumpPeptide */

/************************************/

ParseFuncReturn peptideParse(ACEIN parse_io, KEY key, char **errtext)
     /* ParseFuncType */
{
  char *cp, c = 0 ;
  signed char c1 = 0 ;
  static Array pep = 0 ;

  if (class(key) != _VPeptide)
    messcrash ("peptideParse called on a non-peptide key") ;

  pep = arrayReCreate (pep, 1000, char) ;

  while (aceInCard(parse_io) && (cp = aceInWord(parse_io))) do
    while ((c = *cp++))
      { c1 = pepEncodeChar[((int)c) & 0x7f] ;
	if (c1 >= 0)
	  array(pep,arrayMax(pep),signed char) = c1 ;
	else if (c1 == -1)
	  goto abort ;
      }
    while ((cp = aceInWord(parse_io))) ;

  /* The CDS should include the stop codon so remove it from the translation.*/
  /* N.B. we could issue a warning if it doesn't but perhaps this is         */
  /* overkill.                                                               */
  if (c == '*')			/* removing trailing '*' */
    --arrayMax(pep) ;

  if (arrayMax (pep))		/* don't delete if nothing there */
    {
      if (peptideStore (key, pep))
	return PARSEFUNC_OK ;
      else
	{
	  *errtext = messprintf(" failed to store peptide %s", name(key)) ;
	  return PARSEFUNC_ERR ;
	}
    }
  else
    return PARSEFUNC_EMPTY ;

 abort:
  *errtext = messprintf("  peptideParse error at line %d in %s : "
			"bad char %c (0x%x)", 
			aceInStreamLine(parse_io), name(key), c, c) ;
  return PARSEFUNC_ERR ;
} /* peptideParse */

/*******************************/
/*
#define MID_VAL 10

int pepchecksum(Array a)
{
  int num,tot=0;
  register i,n=1,j=0;
  int max=-99,min=99;
  
  num = (int)(arrayMax(a)/4);

  for(i=0,j=0;j<num;i+=4,j++){
    tot += (int)arr(a,i,signed char)-MID_VAL;
    tot += ((int)arr(a,i+1,signed char)-MID_VAL)*10;
    tot += ((int)arr(a,i+2,signed char)-MID_VAL)*100;
    tot += ((int)arr(a,i+3,signed char)-MID_VAL)*1000;
  }
  
  do remainder 
  for(i=num*4;i<arrayMax(a);i++){
    tot += ((int)arr(a,i,signed char)-MID_VAL)*n;
    n *=10;
  }
  if(tot)
    return tot;
  else
    return 1;
}
*/
/*******************************/
int hashArray(Array a)
{
#define  SIZEOFINT (8 * sizeof (int))
  int n = 32;
  unsigned int i, h = 0 , j = 0,
    rotate = 13 ,
    leftover = SIZEOFINT - rotate ;

  chrono("hashText") ;  
  for(i=0;i<arrayMax(a);i++)
    { h = freeupper(pepDecodeChar[(int)arr(a,i,signed char)]) ^  /* XOR*/
        ( ( h >> leftover ) | (h << rotate)) ; 
    }
  /* compress down to n bits */
  for(j = h, i = n ; i< SIZEOFINT; i+=n)
    j ^= (h>>i) ;
  chronoReturn() ;

  return j &  0xffffffff ;
}
/*******************************/

BOOL peptideStore (KEY key, Array pep)
{ 
  OBJ obj ;
  KEY seq ;
  int len, len1 ;
  int checksum, ck1;

  if (!pepInitialise())
    return FALSE ;

  if (class(key) == _VProtein)
    lexaddkey (name(key), &key, _VPeptide) ;
  else if (class(key) != _VPeptide)
    messcrash ("peptideStore called on a non-peptide key") ;

  lexaddkey (name(key), &seq, _VProtein) ;

  if (!pep || !(len = arrayMax(pep)))
    { 
      arrayKill (key) ;
      if ((obj = bsCreate (seq)))
	{
	  if (bsFindKey (obj, _Peptide, key))
	    { 
	      bsDestroy (obj) ;
	      if ((obj = bsUpdate(seq)))
		{
		  if (obj && bsFindKey (obj, _Peptide, key))
		    bsRemove (obj) ;
		  bsSave (obj) ;
		}
	    }
	  else
	    bsDestroy (obj) ;
	}
    }
  else
    { 
      arrayStore (key, pep, "c") ;
      checksum = hashArray(pep);
      if ((obj = bsCreate (seq)))
	{  /* check first for sake of client server speed */
	  if (bsFindKey (obj, _Peptide, key)  &&
	      bsGetData (obj, _bsRight, _Int, &len1) &&
	      len1 == len &&
	      ( bsType(obj, _bsRight) != _Int ||
		( bsGetData (obj,_bsRight, _Int, &ck1) && ck1 == checksum)
		))
	    { bsDestroy (obj) ; return TRUE ; } 
	  bsDestroy (obj) ;
	}
      if ((obj = bsUpdate (seq)))
	{
	  bsAddKey (obj, _Peptide, key) ;
	  bsAddData (obj, _bsRight, _Int, &len) ;
	  if(bsType(obj, _bsRight) == _Int)
	    bsAddData (obj,_bsRight, _Int, &checksum);
	  bsSave(obj) ;
	}
    }
  return TRUE ;
}

/**************************************/

/* This is the old peptideGet() which only translates the object if it is a  */
/* CDS. Its just a cover function for the newer peptideTranslate() which     */
/* will translate either just the CDS or the whole object.                   */
/*                                                                           */
Array peptideGet(KEY key)
{
  Array result = NULL ;

  result = peptideTranslate(key, TRUE) ; 

  return result ;
}

/* Translates the dna for a particular object to its peptide representation. */
/*                                                                           */
/* If CDS_only is TRUE, then the object will be checked for a CDS tag, and   */
/* and start/end positions for the CDS will be used to define the start and  */
/* end of peptide translation within the exons.                              */
/* If CDS_only is FALSE, then all of the exons for the object will be        */
/* translated.                                                               */
/* In addition, if the Start_not_found tag is found, this will be used to    */
/* frame shift the start of translation within the object.                   */
/*             WE MAY NEED TO ALLOW AN EXTRA ARGUMENT HERE TO ADJUST THE     */
/*             THE START OF TRANSLATION VIA FMAP INTERACTIVELY....           */
/*                                                                           */
/* There is much overlap in checking of CDS & Start_not_found here with that */
/* in fmapconvert() but its difficult to produce a set of routines that are  */
/* convenient to use in both functions.                                      */
/*                                                                           */
Array peptideTranslate(KEY key, BOOL CDS_only)
{ 
  OBJ obj = NULL ;
  int dna_min = 0, dna_max = 0 ;
  Array dna = NULL, pep = NULL ;
  char c = 0 ;

  if (class(key) == _VPeptide)
    return arrayGet(key, char, "c") ;

  if (!(obj = bsCreate (key)))
    return NULL ;

  if (bsGetKey (obj, _Peptide, &key) && class(key) == _VPeptide)
    {
      bsDestroy (obj) ;
      return arrayGet(key, char, "c") ;
    }

  if (bsGetKey (obj, _Corresponding_DNA, &key))	/* try that */
    {
      bsDestroy (obj) ;
      if (!(obj = bsCreate (key)))
	return NULL ;
    }

  if (!(dna = dnaGet(key)))
    {
      bsDestroy (obj) ;
      return NULL ;
    }
  else
    {
      dna_min = 1 ;
      dna_max = arrayMax(dna) ;
    }

  /* Only translate the CDS portion of the objects dna.                      */
  if (CDS_only)
    {
      int cds_min = 0, cds_max = 0 ;

      /* If you only want the CDS translated then there'd better be one !    */
      if (!bsFindTag (obj, _CDS))
	{
	  bsDestroy (obj) ;
	  arrayDestroy (dna) ;
	  return NULL ;
	}

      /* Retrieve the start/end of the cds section of dna.                   */
      cds_min = dna_min ;
      cds_max = dna_max ;

      if (bsGetData(obj, _bsRight, _Int, &cds_min))
	{
	  bsGetData(obj, _bsRight, _Int, &cds_max) ;

	  /* We need to check the coords for the cds start/end, these can be     */
	  /* changed by hand and its easy to forget and specify them in          */
	  /* Source_exons coordinates instead of spliced DNA coordinates.        */
	  /* We warn the user if they have got it wrong.                         */
	  if (cds_min < dna_min || cds_max > dna_max)
	    {
	      messerror("The start/end positions set for the CDS in object %s"
			" are outside the spliced DNA coordinates for that object."
			" Make sure the CDS positions have been specified"
			" in spliced DNA, not Source_exon, coordinates."
			" (spliced DNA coords are %d to %d, current CDS coords are %d to %d)",
			name(key), dna_min, dna_max, cds_min, cds_max) ;

	      bsDestroy (obj) ;
	      arrayDestroy (dna) ;
	      return NULL ;
	    }
	}

      dna_min = cds_min ;
      dna_max = cds_max ;
    }

  /* May have to modify beginning of dna for translation because coding may  */
  /* have begun in a previous exon so we don't have the beginning, the       */
  /* Start_not_found allows us to correct the reading frame by setting a new */
  /* start position.                                                         */
  /* NOTE that if Start_not_found and CDS are set then the CDS MUST start at */
  /* position 1 spliced exon coordinates.                                    */
  if (bsFindTag(obj, _Start_not_found))
    {
      int no_start = 1 ;				    /* default to 1 if no values specified */
      
      if (CDS_only && dna_min != 1)
	{
	  messerror("Data inconsistency: the Start_not_found tag is set for object %s,"
		    " but the CDS begins at position %d instead of at 1.",
		    name(key), dna_min) ;
	  
	  bsDestroy(obj) ;
	  arrayDestroy(dna) ;
	  return NULL ;
	}


      if (bsGetData(obj, _bsRight, _Int, &no_start))
	{
	  if (no_start < 1 || no_start > 3)
	    {
	      messerror("Data inconsistency: the Start_not_found tag for the object %s"
			" should be given the value 1, 2 or 3 but has been"
			" set to %d.",
			name(key), no_start) ;
	      
	      bsDestroy (obj) ;
	      arrayDestroy (dna) ;
	      return NULL ;
	    }
	}
      
      dna_min += (no_start - 1) ;
    }
  

  /* Finally, do the translation, only do complete codons.                   */
  /*                                                                         */
  --dna_min ;						    /* dna_min is >= 1 */
  pep = arrayCreate(((dna_max - dna_min) / 3), char) ;
  while (dna_min < dna_max)
    {
      array(pep, arrayMax(pep), signed char) = 
	pepEncodeChar[(int)(c = codon(arrp(dna, dna_min, char)))] ;
      dna_min += 3 ;
    }

  /* The CDS should include the stop codon so remove it from the translation.*/
  /* N.B. we could issue a warning if it doesn't but perhaps this is         */
  /* overkill.                                                               */
  if (c == '*')
    --arrayMax(pep) ;

  arrayDestroy (dna) ;
  bsDestroy (obj) ;

  return pep ;
} /* peptideGet */


/*************************************************************/
/*************************************************************/

ACEOUT pepFileOpen (STORE_HANDLE handle)
{
  static char directory[DIR_BUFFER_SIZE]= "";
  static char filename[FIL_BUFFER_SIZE] = "";
  ACEOUT pep_out = NULL;

  pep_out = 
    aceOutCreateToChooser ("Choose a file for export in fasta format",
			   directory, filename, "pep", "w", handle);

  return pep_out;
} /* pepFileOpen */

/**********************************************************/

/* called also from dnacptfastaDump */
static void pepDoDump (ACEOUT dump_out, Array a, int from, int to)
{
  char buf [55] ;
  register int i, j, k ;
  
   for (i = from ; i < to ;)
    { 
      k = 0 ;
      for (j = 50 ; i < to && j-- ;)
	{
	  buf [k++] = pepDecodeChar[((int)arr(a, i++, char)) & 0xff] ;
	}
      buf [k++] = '\n' ;
      buf[k++] = 0 ;
      aceOutPrint (dump_out, "%s", buf) ;
    }

   return;
} /* pepDoDump */

/**********************************************************/

BOOL pepDumpFastA (Array a, int from, int to, char *text, ACEOUT dump_out)
{ 
  if (!a)
    return FALSE ;

  if (from < 0)
    from = 0 ;

  ++to ;
  if (to > arrayMax(a))
    to = arrayMax(a) ;

  aceOutPrint (dump_out, ">%s\n", text) ;
  pepDoDump(dump_out, a, from, to) ;

  return TRUE ;
} /* pepDumpFastA */

/**********************************************************/

BOOL pepDumpFastAKey (ACEOUT dump_out, KEY key)
{ 
  BOOL result = FALSE ;

  result = pepDoDumpFastAKey(dump_out, key, FALSE) ;

  return result ;
}

static BOOL pepDoDumpFastAKey(ACEOUT dump_out, KEY key, BOOL cds_only)
{ 
  BOOL result = FALSE ;
  Array a = NULL ;

  a = peptideTranslate(key, cds_only) ;
  if (a)
    {
      result = pepDumpFastA(a, 0, arrayMax(a)-1, name(key), dump_out);
      arrayDestroy(a) ;
    }

  return result ;
} /* pepDumpFastAKey */


/**********************************************************/
/* Dump the peptides for a keyset, either the full peptides for the RNA's    */
/* or just the CDS portions.                                                 */

int pepDumpFastAKeySet(ACEOUT dump_out, KEYSET kSet)
{
  int result = 0 ;

  result = pepDoDumpFastAKeySet(dump_out, kSet, FALSE) ;

  return result ;
}

int pepDumpCDSFastAKeySet(ACEOUT dump_out, KEYSET kSet)
{
  int result = 0 ;

  result = pepDoDumpFastAKeySet(dump_out, kSet, TRUE) ;

  return result ;
}

static int pepDoDumpFastAKeySet(ACEOUT dump_out, KEYSET kSet, BOOL cds_only)
{
  KEYSET alpha ;
  int i, n = 0 ;

  if (!keySetExists(kSet) || keySetMax(kSet) == 0)
    return 0 ;

  alpha = keySetAlphaHeap (kSet, keySetMax(kSet)) ;

  for (i = 0 ; i < keySetMax(alpha) ; ++i)
    {
      if (pepDoDumpFastAKey(dump_out, keySet(alpha, i), cds_only))
	++n ;
    }

  keySetDestroy (alpha) ;

  messout ("I wrote %d sequences", n) ;

  return n ;
} /* pepDumpFastAKeySet */

/************************** eof *****************************/