/* aacomp.c calculate the molecular wt and aa composition of a protein sequence */ #include #include char *aa0; int n0; #define MAXSEQ 10000 int naac[23], taac[23]; int naa=23; char aa[]="ACDEFGHIKLMNPQRSTVWYBZX"; char *saa[]={"Ala","Cys","Asp","Glu","Phe","Gly","His","Ile","Lys","Leu", "Met","Asn","Pro","Gln","Arg","Ser","Thr","Val","Trp","Tyr", "Asx","Glx"," ? "}; float wtaa[] = { 71.09, 103.15, 115.10, 129.13, 147.19, 57.07, 137.16, 113.17, 128.19, 113.17, 131.31, 114.12, 97.13, 128.15, 156.20, 87.09, 101.12, 99.15, 186.23, 163.19, 114.61, 128.64, 0.0 } ; FILE *aafd; char fname[120]; static char lline[1024]; main(argc,argv) int argc; char **argv; { int ia, nlib; float molewt, t_molewt; long ntt; if (argc>1) strncpy(fname,argv[1],120); else { fprintf(stderr," usage - aacomp filename\n"); exit(1); } if ((aa0=calloc((size_t)MAXSEQ,sizeof(char)))==NULL) { printf(" cannot allocate %d array\n",MAXSEQ); exit(1); } initmat(aa,naa); if (strlen(fname)>0) { if ((aafd=fopen(fname,"r"))==NULL) { printf(" cannot open %s\n",fname); exit(1); } } else aafd = stdin; fgets(lline,sizeof(lline),aafd); t_molewt = 0.0; ntt = nlib = 0; for (ia=0; ia0) { nlib++; ntt += n0; for (ia=0; ia0 && aa[iaa]>='A' && aa[iaa]<='Z') aascii[aa[iaa]-'A'+'a']=aascii[aa[iaa]]; } } fgetseq(seq,maxs,fptr) char *seq; int maxs; FILE *fptr; { int i, n; int ic; i=0; n=0; while(fgets(lline,sizeof(lline),fptr)!=0) { if (lline[0]=='>') break; for (i=0; (n=0); i++) if (ic>0) seq[n++]= --ic; } if (n==maxs) printf(" sequence may be truncated\n %d %d",n,maxs); return n; }