/** ** Copyright (c) 2007-2010 Illumina, Inc. ** ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). ** ** This file is part of the Consensus Assessment of Sequence And VAriation ** (CASAVA) software package. ** **/ #ifndef GA_COMMON_FILE_TYPE_METADATA_HH #define GA_COMMON_FILE_TYPE_METADATA_HH #include #include #include #include #include #include #include #include #include #include #include "common/Compression.hh" namespace fs = boost::filesystem; namespace casava { namespace common { /** ** \brief Global provider of metadata for the different file types ** used in the Data Analysis Pipeline. ** ** This type has two main responsibilities: ** ** - strictly controlled and type-safe use of file types ** - centralized definition of the metadata. ** ** The organization of data files varies across the pipeline. Some ** files contain data for only one tile, others aggregate all the ** tiles for a single line. In an orthogonal way, some files contain ** only the information for a single read, other file types aggregate ** the data for all the reads. This gives us two orthogonal ways to ** classify the data: ** ** - tile or lane level ** - splits or joins the reads ** ** Adding a new file type requires the following steps: ** ** - declare the file type T in the FileTypeEnum ** - define a user-friendly name for FileType ** - add FileType in the appropriate file vector ** - define the associated string litteral in the compilation unit **/ class FileTypeMetadata { private: /** ** \brief Enumeration of the file types. ** ** Note: FIRST and LAST are used as tags to check that an integer ** represents a valid file type. **/ enum {FIRST, INT, NSE, CIF, CNF, POS, SIG2, PRB, QHG, QCM, QVAL, SEQ, QSEQ, UBQSEQ, ALL, LAST} FileTypeEnum; /** ** \brief Tag type for each file type ** ** \tparam i Should be one of the values provides by FileTypeEnum. **/ template struct FileType; public: /// \brief Intensity files typedef FileType IntFile; /// \brief Noise files typedef FileType NseFile; /// \brief Binary Intensity files typedef FileType CifFile; /// \brief Binary Noise files typedef FileType CnfFile; /// \brief Position files typedef FileType PosFile; /// \brief Qcm files typedef FileType QcmFile; /// \brief Qhg files typedef FileType QhgFile; /// \brief corrected signal files typedef FileType Sig2File; /// \brief probability files typedef FileType PrbFile; /// \brief quality values files typedef FileType QvalFile; /// \brief deprecated sequence files typedef FileType SeqFile; /// \brief "qualified sequence" files typedef FileType QseqFile; /// \brief "masked sequence" files typedef FileType UbQseqFile; /// \brief "signal means" files typedef FileType AllFile; /** ** \brief List of Tile file types aggregating the data for all reads. **/ typedef boost::mpl::vector JoinedReadTileFileList; /** ** \brief List of Tile file types splitting the reads into different files. **/ typedef boost::mpl::vector SplitReadTileFileList; /** ** \brief List of Lane file types aggregating the data for all reads. **/ typedef boost::mpl::vector JoinedReadLaneFileList; /** ** \brief List of Lane file types splitting the reads into different files. **/ typedef boost::mpl::vector<> SplitReadLaneFileList; /** ** \brief Get the full path to a qcm file. **/ template static fs::path getQcmFullPath(const fs::path &qcmDirectory, const std::string &sampleLiteral, T1 laneId, T2 tileId, T3 cycle) { const std::string laneDirectory = (boost::format("L00%d") % laneId).str(); const std::string fileName = (boost::format("%s_%d_%04d_%02d_qcm.xml") % sampleLiteral % laneId % tileId % cycle).str(); return qcmDirectory / laneDirectory / fileName; } /** ** \brief Construct the file name for a joined tile file. ** ** \tparam T must be a FileType where FIRST < I < LAST and must ** be registered in the vector SplitReadTileFileList (else a ** compilation error is generated). ** ** \return The file name (including the extension for the compression, if any). **/ template static std::string getTileFileName(TL laneId, TT tileId, const Compression &compression); /** ** \brief Construct the file name for a split tile file. ** ** \tparam T must be a FileType where FIRST < I < LAST and must ** be registered in the vector SplitReadTileFileList (else a ** compilation error is generated). ** ** \return The file name (including the extension for the compression, if any). **/ template static std::string getTileFileName(TL laneId, TR readId, TT tileId, const Compression &compression); /** ** \brief Construct the file name for a joined lane file. ** ** \tparam T must be a FileType where FIRST < I < LAST and must ** be registered in the vector SplitReadLaneFileList (else a ** compilation error is generated). ** ** \return The file name (including the extension for the compression, if any). **/ template static std::string getLaneFileName(TL laneId, const Compression &compression); /** ** \brief Construct the file name for a split lane file. ** ** \tparam T must be a FileType where FIRST < I < LAST and must ** be registered in the vector SplitReadLaneFileList (else a ** compilation error is generated). ** ** \return The file name (including the extension for the compression, if any). **/ template static std::string getLaneFileName(TL laneId, TR readId, const Compression &compression); private: /// \brief Enumeration of the organization level of the file. enum {LANE_LEVEL, TILE_LEVEL}; /// \brief Enumeration of the structure of the files regarding reads (split or joined) enum {JOINED_READS, SPLIT_READS}; /** ** \brief Generic method to name a file. ** ** Checks that the integer representation of the file type ** (T::value) is a valid value from FileTypeEnum and that ** T==FileType. A compilation error is returned if T ** does not meet both prerequisites. ** ** Checks that the type T belongs to TypeVector, otherwise, ** generates a compilation error. ** ** \param[in] laneId the identifier of the lane (strictly positive). ** ** \param[in] readId the identifier of the read (strictly ** positive). Ignored (and can be 0) if the template parameter ** ReadStructure==JOINED_READS. ** ** \param[in] tileId the identifier of the read (strictly ** positive). Ignored (and can be 0) if the template parameter ** Level==LANE_LEVEL. ** ** \param[in] compression the type of compression. ** ** \tparam Level either LANE_LEVEL or TILE_LEVEL. ** ** \tparam ReadStructure either JOINED_READS or SPLIT_READS. ** ** \tparam T must be a FileType where FIRST < I < LAST and must ** be registered in the vector SplitReadLaneFileList (else a ** compilation error is generated). ** ** \tparam TypeVector the list of types where T belongs. ** ** \tparam TL the data type used to represent the lane identifier ** (unsigned int, string, char, etc.) ** ** \tparam TR the data type used to represent the read identifier. ** ** \tparam TT the data type used to represent the tile identifier. **/ template static std::string getFileName(TL laneId, TR readId, TT tileId, const Compression &compression ) { assert (TL(0) != laneId); BOOST_MPL_ASSERT_RELATION( FIRST , <, T::value ); BOOST_MPL_ASSERT_RELATION( LAST , >, T::value ); BOOST_MPL_ASSERT( (boost::is_same, T>) ); // check that the given file type T is listed in TypeVector typedef typename boost::mpl::find::type TypeIterator; typedef typename boost::mpl::end::type TypeEnd; // If the assertion fails, check that the file type is in TypeVector BOOST_MPL_ASSERT_NOT(( boost::is_same )); std::ostringstream os; os << "s_" << laneId; if (SPLIT_READS == ReadStructure) { assert (TR(0) != readId); os << '_' << readId; } if (TILE_LEVEL == Level) { assert (TT(0) != tileId); os << '_' << std::setw(4) << std::setfill('0') << tileId; } os << '_' << T::string << ".txt" << compression.getFileNameExtension(); return os.str(); } }; template struct FileTypeMetadata::FileType { /// \brief The integer representation of the file type. static const int value = i; /// the litteral associated to the file type (Must be /// explicitely defined in the compilation unit). static const char string[]; }; template std::string FileTypeMetadata::getTileFileName(TL laneId, TT tileId, const Compression &compression) { return getFileName(laneId, 0, tileId, compression); } template std::string FileTypeMetadata::getTileFileName(TL laneId, TR readId, TT tileId, const Compression &compression) { return getFileName(laneId, readId, tileId, compression); } template std::string FileTypeMetadata::getLaneFileName(TL laneId, const Compression &compression) { return getFileName(laneId, 0, 0, compression); } template std::string FileTypeMetadata::getLaneFileName(TL laneId, TR readId, const Compression &compression) { return getFileName(laneId, readId, 0, compression); } } } #endif // #ifndef GA_COMMON_FILE_TYPE_METADATA_HH