/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include #include #include #include "bool.h" #include "readPrimerScreenSequences.h" #include #define nMaxLineSize 200 void readPrimerScreenSequences( const RWCString& soPrimerFileOfSequences, char*** ppszSequence, typeName** ppszSequenceName, int** ppnLengthOfSequence, int* pnNumberOfSequences, bool* pbError, char* szErrorMessage ) { FILE *pFile; pFile = fopen( (char*) soPrimerFileOfSequences.data(), "r" ); if (pFile == NULL ) { sprintf( szErrorMessage, "Could not open primer screen file %s defined by Consed resources consed.primersSubcloneFullPathnameOfFileOfSequencesForScreening or consed.primersCloneFullPathnameOfFileOfSequencesForScreening Please check the permissions on this file. You probably did not correctly install consed. For more information on this, click the Help menu in consed and search for INSTALLING CONSED. Complete all the steps, including those dealing with primerCloneScreen.seq and primerSubcloneScreen.seq", (char*) soPrimerFileOfSequences.data() ); *pbError = true; return; } char szLine[nMaxLineSize]; if ( fgets( szLine, nMaxLineSize, pFile ) == NULL ) { sprintf( szErrorMessage, "The vector file %s was empty", (char*) soPrimerFileOfSequences.data() ); *pbError = true; return; } // We already found one sequence--the one on the 1st line *pnNumberOfSequences = 1; if ( szLine[0] != '>' ) { sprintf( szErrorMessage, "The vector file %s must be in FASTA format. The bases must be preceded by a line of the form >(sequence name) where the > must be included", (char*) soPrimerFileOfSequences.data() ); *pbError = true; return; } while( fgets(szLine, nMaxLineSize, pFile) != NULL ) { if (szLine[0] == '>' ) ++*pnNumberOfSequences; } rewind( pFile ); typeName* pszSequenceName = (typeName*) malloc( sizeof(typeName ) * *pnNumberOfSequences ); int* pnLengthOfSequence = (int*) malloc( sizeof(int) * *pnNumberOfSequences ); char** pszSequence = (char**) malloc( sizeof(char*) * *pnNumberOfSequences ); if ( ! pszSequenceName ) { sprintf( szErrorMessage, "could not malloc enough memory" ); *pbError = true; return; } int nSequenceNumber = -1; while( fgets( szLine, nMaxLineSize, pFile ) != NULL ) { if (szLine[0] == '>' ) { ++nSequenceNumber; strcpy( pszSequenceName[ nSequenceNumber ], &(szLine[1]) ); pnLengthOfSequence[ nSequenceNumber ] = 0; } else { for( int nPos = 0; nPos < strlen( szLine ); ++nPos ) { if ( isalpha( szLine[nPos] ) ) ++pnLengthOfSequence[ nSequenceNumber ]; } } } for( nSequenceNumber = 0; nSequenceNumber < *pnNumberOfSequences; ++nSequenceNumber ) { pszSequence[ nSequenceNumber ] = (char*) malloc( ( pnLengthOfSequence[ nSequenceNumber ] + 1) * sizeof( char ) ); if (! pszSequence[ nSequenceNumber ] ) { sprintf( szErrorMessage, "could not malloc enough memory" ); *pbError = true; return; } } rewind( pFile ); char* szPtr; nSequenceNumber = -1; while( fgets( szLine, nMaxLineSize, pFile) != NULL ) { if ( szLine[0] == '>' ) { if (nSequenceNumber >= 0 ) *szPtr = '\0'; ++nSequenceNumber; szPtr = pszSequence[ nSequenceNumber ]; } else { if (nSequenceNumber >= 0 ) { for( int nPos = 0; nPos < strlen( szLine); ++nPos ) { if ( isalpha( szLine[nPos] ) ) { *szPtr = tolower( szLine[ nPos ] ); ++szPtr; } } } } } *szPtr = '\0'; for( nSequenceNumber = 0; nSequenceNumber < *pnNumberOfSequences; ++nSequenceNumber ) { if ( pnLengthOfSequence[ nSequenceNumber ] != strlen( pszSequence[ nSequenceNumber ] ) ) { sprintf( szErrorMessage, "inconsistency between pnLengthOfSequence %d and pszSequence %d for sequence number %d", pnLengthOfSequence[ nSequenceNumber ], strlen( pszSequence[ nSequenceNumber ] ), nSequenceNumber ); *pbError = true; return; } } fclose( pFile ); *ppszSequenceName = pszSequenceName; *ppnLengthOfSequence = pnLengthOfSequence; *ppszSequence = pszSequence; *pbError = false; }