/* File: peptide.c * Author: Richard Durbin (rd@sanger.ac.uk) * Copyright (C) J Thierry-Mieg and R Durbin, 1994 * ------------------------------------------------------------------- * Acedb is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * or see the on-line version at http://www.gnu.org/copyleft/gpl.txt * ------------------------------------------------------------------- * This file is part of the ACEDB genome database package, written by * Richard Durbin (MRC LMB, UK) rd@mrc-lmb.cam.ac.uk, and * Jean Thierry-Mieg (CRBM du CNRS, France) mieg@kaa.cnrs-mop.fr * * Description: * Exported functions: * HISTORY: * Last edited: Nov 30 17:57 2001 (edgrif) * Created: Tue May 10 23:59:13 1994 (rd) * CVS info: $Id: peptide.c,v 1.56 2001/12/03 11:09:43 edgrif Exp $ *------------------------------------------------------------------- */ #include #include #include #include #include #include #include #include #include #include #include #include #include /********************************************************/ static BOOL pepDoDumpFastAKey(ACEOUT dump_out, KEY key, BOOL cds_only) ; static int pepDoDumpFastAKeySet(ACEOUT dump_out, KEYSET kSet, BOOL cds_only) ; /********************************************************/ char pepDecodeChar[] = { 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', 'X', '.', '*' } ; signed char pepEncodeChar[] = { -1, -1, -1, -1, -1, -1, -1, -1, -4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -4, -1, -1, -1, -1, -1, -1, -1, -1, -1, 22, -1, -1, -1, -1, -1, /* '*' */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -1, -1, -1, -1, -1, -1, /* digits */ -1, 0, 20, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, 11, -1, 12, 13, 14, 15, 16, -1, 17, 18, 20, 19, 20, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } ; char *pepName[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "Stop", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "Alanine", "Asp/Asn", "Cysteine", "Aspartate", "Glutamate", "Phenylalanine", "Glycine", "Histidine", "Isoleucine", 0, "Lysine", "Leucine", "Methionine", "Asparagine", 0, "Proline", "Glutamine", "Arginine", "Serine", "Threonine", 0, "Valine", "Tryptophan", "Unkown", "Tyrosine", "Glu/Gln", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } ; char *pepShortName[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "***", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "Ala", "AsX", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile", 0,"Lys", "Leu", "Met", "Asn", 0, "Pro", "Gln", "Arg", "Ser", "Thr", 0, "Val", "Trp", "XXX", "Tyr", "GlX", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } ; /* mhmp 23.09.98 */ /* Antobodies A Laboratory Manual Ed Harlow David Lane p. 661*/ /* Cold Spring Harbor Laboratory 1988 page 661 */ /* je mets le Stop a -1 et les Asp/Asn, Glu/Gln et Unknown a 0 */ int molecularWeight[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, 0, 121, 133, 147, 165, 75, 155, 131, 0,146, 131, 149, 132, 0, 115, 146, 174, 105, 119, 0, 117, 204, 0, 181, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } ; float NH2pK[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9.87, 0, 10.78, 9.6, 9.67, 9.24, 9.6, 8.97, 9.76, 0, 8.9, 9.6, 9.21, 8.8, 0, 10.6, 9.13, 9.09, 9.15, 9.12, 0, 9.72, 9.39, 0, 9.11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } ; float COOHpK[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2.35, 0, 1.71, 1.88, 2.19, 2.58, 2.34, 1.78, 2.32, 0,2.2, 2.36, 2.28, 2.02, 0, 1.99, 2.17, 2.18, 2.21, 2.15, 0, 2.29, 2.38, 0, 2.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } ; /**************************************************************/ /* _VProtein is declared in classes.h/class.c */ static BOOL pepInitialise (void) { KEY key ; if (!lexword2key("?Protein", &key, _VModel) && iskey(key)!=2) { messerror("No model for Proteins, please edit wspec/models.wrm") ; return FALSE ; } return TRUE ; } /****************/ /*** next two functions were in dnacpt.c but should be here ***/ char codon (char* cp) { switch(*cp) { case T_: switch(*(cp+1)) { case T_: switch(*(cp+2)) { case T_: case C_: case Y_: return 'F'; case A_: case G_: case R_: return 'L'; default: return 'X' ; } case C_: return 'S' ; case A_: switch(*(cp+2)) { case T_: case C_: case Y_: return 'Y'; case A_: case G_: case R_: return '*'; default: return 'X' ; } case G_: switch(*(cp+2)) { case T_: case C_: case Y_: return 'C'; case A_: return '*'; case G_: return 'W'; default: return 'X' ; } default: return 'X' ; } case C_: switch(*(cp+1)) { case T_: return 'L' ; case C_: return 'P' ; case A_: switch(*(cp+2)) { case T_: case C_: case Y_: return 'H'; case A_: case G_: case R_: return 'Q'; default: return 'X' ; } case G_: return 'R' ; default: return 'X' ; } case A_: switch(*(cp+1)) { case T_: switch(*(cp+2)) { case T_: case C_: case A_: case Y_: case M_: case W_: case H_: return 'I'; case G_: return 'M'; default: return 'X' ; } case C_: return 'T' ; case A_: switch(*(cp+2)) { case T_: case C_: case Y_: return 'N'; case A_: case G_: case R_: return 'K'; default: return 'X' ; } case G_: switch(*(cp+2)) { case T_: case C_: case Y_: return 'S'; case A_: case G_: case R_: return 'R'; default: return 'X' ; } default: return 'X' ; } case G_: switch(*(cp+1)) { case T_: return 'V' ; case C_: return 'A' ; case A_: switch(*(cp+2)) { case T_: case C_: case Y_: return 'D'; case A_: case G_: case R_: return 'E'; default: return 'X' ; } case G_: return 'G' ; default: return 'X' ; } /***************** Wobble on first Letter *******************/ case Y_: /* T or C */ switch(*(cp+1)) { case U_: switch(*(cp+2)) { case A_: case G_: case R_: return 'L'; default: return 'X' ; } default: return 'X' ; } case M_: /* A or C */ switch(*(cp+1)) { case G_: switch(*(cp+2)) { case A_: case G_: case R_: return 'R'; default: return 'X' ; } default:return 'X' ; } case R_: switch(*(cp+1)) { case A_: switch(*(cp+2)) { case T_: case C_: case Y_: return 'B'; /*Asp, Asn */ default: return 'X' ; } default: return 'X' ; } case S_: switch(*(cp+1)) { case A_: switch(*(cp+2)) { case A_: case G_: case R_: return 'Z'; /* Glu, Gln */ default: return 'X' ; } default: return 'X' ; } default: return 'X' ; } } /****************/ char reverseCodon (char* cp) { char temp[3] ; temp[0] = complementBase[(int)cp[2]] ; temp[1] = complementBase[(int)cp[1]] ; temp[2] = complementBase[(int)cp[0]] ; return codon (temp) ; } /****************/ char antiCodon (char* cp) { char temp[3] ; temp[0] = complementBase[(int)cp[0]] ; temp[1] = complementBase[(int)cp[-1]] ; temp[2] = complementBase[(int)cp[-2]] ; return codon (temp) ; } /****************************************/ void pepDecodeArray (Array pep) { int i = arrayMax(pep) ; char *cp = arrp(pep,0,char) ; while (i--) { *cp = pepDecodeChar[(int)*cp] ; ++cp ; } } void pepEncodeArray (Array pep) { int i = arrayMax(pep) ; signed char *cp = arrp(pep,0, signed char) ; while (i--) { *cp = pepEncodeChar[*cp] ; ++cp ; } } /*****************************/ static void peptideDoDump (ACEOUT dump_out, Array pep) { register int i, k, k1, fin = arrayMax(pep) ; register char *cp, *cq ; char buffer [4100] ; i = fin ; cp = arrp(pep,0,char) ; cq = buffer ; while(i > 0) { cq = buffer ; memset(buffer,0,4100) ; for (k=0 ; k < 4000/50 ; k++) if (i > 0) { k1 = 50 ; while (k1-- && i--) *cq++ = pepDecodeChar[*cp++ & 0xff] ; *cq++ = '\n' ; *cq = 0 ; } aceOutPrint (dump_out, "%s", buffer) ; } return; } /* peptideDoDump */ /*****************************/ BOOL peptideDump (ACEOUT dump_out, KEY k) /* DumpFuncType for class _VPeptide */ { Array pep = 0 ; if (!(pep = arrayGet(k, char, "c"))) return FALSE ; peptideDoDump(dump_out, pep) ; arrayDestroy(pep); return TRUE ; } /* peptideDump */ /************************************/ BOOL javaDumpPeptide (ACEOUT dump_out, KEY key) { Array pep = arrayGet(key, char, "c") ; if (!pep) return FALSE ; aceOutPrint (dump_out, "?Peptide?%s?\t?peptide?", freejavaprotect(name(key))); pepDecodeArray(pep) ; array(pep,arrayMax(pep),char) = 0 ; /* ensure 0 terminated */ --arrayMax(pep) ; aceOutPrint (dump_out, "%s", arrp(pep,0,char)) ; aceOutPrint (dump_out, "?\n\n"); arrayDestroy(pep) ; return TRUE; } /* javaDumpPeptide */ /************************************/ ParseFuncReturn peptideParse(ACEIN parse_io, KEY key, char **errtext) /* ParseFuncType */ { char *cp, c = 0 ; signed char c1 = 0 ; static Array pep = 0 ; if (class(key) != _VPeptide) messcrash ("peptideParse called on a non-peptide key") ; pep = arrayReCreate (pep, 1000, char) ; while (aceInCard(parse_io) && (cp = aceInWord(parse_io))) do while ((c = *cp++)) { c1 = pepEncodeChar[((int)c) & 0x7f] ; if (c1 >= 0) array(pep,arrayMax(pep),signed char) = c1 ; else if (c1 == -1) goto abort ; } while ((cp = aceInWord(parse_io))) ; /* The CDS should include the stop codon so remove it from the translation.*/ /* N.B. we could issue a warning if it doesn't but perhaps this is */ /* overkill. */ if (c == '*') /* removing trailing '*' */ --arrayMax(pep) ; if (arrayMax (pep)) /* don't delete if nothing there */ { if (peptideStore (key, pep)) return PARSEFUNC_OK ; else { *errtext = messprintf(" failed to store peptide %s", name(key)) ; return PARSEFUNC_ERR ; } } else return PARSEFUNC_EMPTY ; abort: *errtext = messprintf(" peptideParse error at line %d in %s : " "bad char %c (0x%x)", aceInStreamLine(parse_io), name(key), c, c) ; return PARSEFUNC_ERR ; } /* peptideParse */ /*******************************/ /* #define MID_VAL 10 int pepchecksum(Array a) { int num,tot=0; register i,n=1,j=0; int max=-99,min=99; num = (int)(arrayMax(a)/4); for(i=0,j=0;j> leftover ) | (h << rotate)) ; } /* compress down to n bits */ for(j = h, i = n ; i< SIZEOFINT; i+=n) j ^= (h>>i) ; chronoReturn() ; return j & 0xffffffff ; } /*******************************/ BOOL peptideStore (KEY key, Array pep) { OBJ obj ; KEY seq ; int len, len1 ; int checksum, ck1; if (!pepInitialise()) return FALSE ; if (class(key) == _VProtein) lexaddkey (name(key), &key, _VPeptide) ; else if (class(key) != _VPeptide) messcrash ("peptideStore called on a non-peptide key") ; lexaddkey (name(key), &seq, _VProtein) ; if (!pep || !(len = arrayMax(pep))) { arrayKill (key) ; if ((obj = bsCreate (seq))) { if (bsFindKey (obj, _Peptide, key)) { bsDestroy (obj) ; if ((obj = bsUpdate(seq))) { if (obj && bsFindKey (obj, _Peptide, key)) bsRemove (obj) ; bsSave (obj) ; } } else bsDestroy (obj) ; } } else { arrayStore (key, pep, "c") ; checksum = hashArray(pep); if ((obj = bsCreate (seq))) { /* check first for sake of client server speed */ if (bsFindKey (obj, _Peptide, key) && bsGetData (obj, _bsRight, _Int, &len1) && len1 == len && ( bsType(obj, _bsRight) != _Int || ( bsGetData (obj,_bsRight, _Int, &ck1) && ck1 == checksum) )) { bsDestroy (obj) ; return TRUE ; } bsDestroy (obj) ; } if ((obj = bsUpdate (seq))) { bsAddKey (obj, _Peptide, key) ; bsAddData (obj, _bsRight, _Int, &len) ; if(bsType(obj, _bsRight) == _Int) bsAddData (obj,_bsRight, _Int, &checksum); bsSave(obj) ; } } return TRUE ; } /**************************************/ /* This is the old peptideGet() which only translates the object if it is a */ /* CDS. Its just a cover function for the newer peptideTranslate() which */ /* will translate either just the CDS or the whole object. */ /* */ Array peptideGet(KEY key) { Array result = NULL ; result = peptideTranslate(key, TRUE) ; return result ; } /* Translates the dna for a particular object to its peptide representation. */ /* */ /* If CDS_only is TRUE, then the object will be checked for a CDS tag, and */ /* and start/end positions for the CDS will be used to define the start and */ /* end of peptide translation within the exons. */ /* If CDS_only is FALSE, then all of the exons for the object will be */ /* translated. */ /* In addition, if the Start_not_found tag is found, this will be used to */ /* frame shift the start of translation within the object. */ /* WE MAY NEED TO ALLOW AN EXTRA ARGUMENT HERE TO ADJUST THE */ /* THE START OF TRANSLATION VIA FMAP INTERACTIVELY.... */ /* */ /* There is much overlap in checking of CDS & Start_not_found here with that */ /* in fmapconvert() but its difficult to produce a set of routines that are */ /* convenient to use in both functions. */ /* */ Array peptideTranslate(KEY key, BOOL CDS_only) { OBJ obj = NULL ; int dna_min = 0, dna_max = 0 ; Array dna = NULL, pep = NULL ; char c = 0 ; if (class(key) == _VPeptide) return arrayGet(key, char, "c") ; if (!(obj = bsCreate (key))) return NULL ; if (bsGetKey (obj, _Peptide, &key) && class(key) == _VPeptide) { bsDestroy (obj) ; return arrayGet(key, char, "c") ; } if (bsGetKey (obj, _Corresponding_DNA, &key)) /* try that */ { bsDestroy (obj) ; if (!(obj = bsCreate (key))) return NULL ; } if (!(dna = dnaGet(key))) { bsDestroy (obj) ; return NULL ; } else { dna_min = 1 ; dna_max = arrayMax(dna) ; } /* Only translate the CDS portion of the objects dna. */ if (CDS_only) { int cds_min = 0, cds_max = 0 ; /* If you only want the CDS translated then there'd better be one ! */ if (!bsFindTag (obj, _CDS)) { bsDestroy (obj) ; arrayDestroy (dna) ; return NULL ; } /* Retrieve the start/end of the cds section of dna. */ cds_min = dna_min ; cds_max = dna_max ; if (bsGetData(obj, _bsRight, _Int, &cds_min)) { bsGetData(obj, _bsRight, _Int, &cds_max) ; /* We need to check the coords for the cds start/end, these can be */ /* changed by hand and its easy to forget and specify them in */ /* Source_exons coordinates instead of spliced DNA coordinates. */ /* We warn the user if they have got it wrong. */ if (cds_min < dna_min || cds_max > dna_max) { messerror("The start/end positions set for the CDS in object %s" " are outside the spliced DNA coordinates for that object." " Make sure the CDS positions have been specified" " in spliced DNA, not Source_exon, coordinates." " (spliced DNA coords are %d to %d, current CDS coords are %d to %d)", name(key), dna_min, dna_max, cds_min, cds_max) ; bsDestroy (obj) ; arrayDestroy (dna) ; return NULL ; } } dna_min = cds_min ; dna_max = cds_max ; } /* May have to modify beginning of dna for translation because coding may */ /* have begun in a previous exon so we don't have the beginning, the */ /* Start_not_found allows us to correct the reading frame by setting a new */ /* start position. */ /* NOTE that if Start_not_found and CDS are set then the CDS MUST start at */ /* position 1 spliced exon coordinates. */ if (bsFindTag(obj, _Start_not_found)) { int no_start = 1 ; /* default to 1 if no values specified */ if (CDS_only && dna_min != 1) { messerror("Data inconsistency: the Start_not_found tag is set for object %s," " but the CDS begins at position %d instead of at 1.", name(key), dna_min) ; bsDestroy(obj) ; arrayDestroy(dna) ; return NULL ; } if (bsGetData(obj, _bsRight, _Int, &no_start)) { if (no_start < 1 || no_start > 3) { messerror("Data inconsistency: the Start_not_found tag for the object %s" " should be given the value 1, 2 or 3 but has been" " set to %d.", name(key), no_start) ; bsDestroy (obj) ; arrayDestroy (dna) ; return NULL ; } } dna_min += (no_start - 1) ; } /* Finally, do the translation, only do complete codons. */ /* */ --dna_min ; /* dna_min is >= 1 */ pep = arrayCreate(((dna_max - dna_min) / 3), char) ; while (dna_min < dna_max) { array(pep, arrayMax(pep), signed char) = pepEncodeChar[(int)(c = codon(arrp(dna, dna_min, char)))] ; dna_min += 3 ; } /* The CDS should include the stop codon so remove it from the translation.*/ /* N.B. we could issue a warning if it doesn't but perhaps this is */ /* overkill. */ if (c == '*') --arrayMax(pep) ; arrayDestroy (dna) ; bsDestroy (obj) ; return pep ; } /* peptideGet */ /*************************************************************/ /*************************************************************/ ACEOUT pepFileOpen (STORE_HANDLE handle) { static char directory[DIR_BUFFER_SIZE]= ""; static char filename[FIL_BUFFER_SIZE] = ""; ACEOUT pep_out = NULL; pep_out = aceOutCreateToChooser ("Choose a file for export in fasta format", directory, filename, "pep", "w", handle); return pep_out; } /* pepFileOpen */ /**********************************************************/ /* called also from dnacptfastaDump */ static void pepDoDump (ACEOUT dump_out, Array a, int from, int to) { char buf [55] ; register int i, j, k ; for (i = from ; i < to ;) { k = 0 ; for (j = 50 ; i < to && j-- ;) { buf [k++] = pepDecodeChar[((int)arr(a, i++, char)) & 0xff] ; } buf [k++] = '\n' ; buf[k++] = 0 ; aceOutPrint (dump_out, "%s", buf) ; } return; } /* pepDoDump */ /**********************************************************/ BOOL pepDumpFastA (Array a, int from, int to, char *text, ACEOUT dump_out) { if (!a) return FALSE ; if (from < 0) from = 0 ; ++to ; if (to > arrayMax(a)) to = arrayMax(a) ; aceOutPrint (dump_out, ">%s\n", text) ; pepDoDump(dump_out, a, from, to) ; return TRUE ; } /* pepDumpFastA */ /**********************************************************/ BOOL pepDumpFastAKey (ACEOUT dump_out, KEY key) { BOOL result = FALSE ; result = pepDoDumpFastAKey(dump_out, key, FALSE) ; return result ; } static BOOL pepDoDumpFastAKey(ACEOUT dump_out, KEY key, BOOL cds_only) { BOOL result = FALSE ; Array a = NULL ; a = peptideTranslate(key, cds_only) ; if (a) { result = pepDumpFastA(a, 0, arrayMax(a)-1, name(key), dump_out); arrayDestroy(a) ; } return result ; } /* pepDumpFastAKey */ /**********************************************************/ /* Dump the peptides for a keyset, either the full peptides for the RNA's */ /* or just the CDS portions. */ int pepDumpFastAKeySet(ACEOUT dump_out, KEYSET kSet) { int result = 0 ; result = pepDoDumpFastAKeySet(dump_out, kSet, FALSE) ; return result ; } int pepDumpCDSFastAKeySet(ACEOUT dump_out, KEYSET kSet) { int result = 0 ; result = pepDoDumpFastAKeySet(dump_out, kSet, TRUE) ; return result ; } static int pepDoDumpFastAKeySet(ACEOUT dump_out, KEYSET kSet, BOOL cds_only) { KEYSET alpha ; int i, n = 0 ; if (!keySetExists(kSet) || keySetMax(kSet) == 0) return 0 ; alpha = keySetAlphaHeap (kSet, keySetMax(kSet)) ; for (i = 0 ; i < keySetMax(alpha) ; ++i) { if (pepDoDumpFastAKey(dump_out, keySet(alpha, i), cds_only)) ++n ; } keySetDestroy (alpha) ; messout ("I wrote %d sequences", n) ; return n ; } /* pepDumpFastAKeySet */ /************************** eof *****************************/