/** ** Copyright (c) 2007-2010 Illumina, Inc. ** ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). ** ** This file is part of the Consensus Assessment of Sequence And VAriation ** (CASAVA) software package. ** ** @file SmallAssembler.hh ** ** @brief Stores default params for the SmallAssembler in Grouper. ** ** @author Tony Cox, Ole Schulz-Trieglaff **/ #ifndef SMALLASSEMBLER_HH_ #define SMALLASSEMBLER_HH_ // CASAVA includes #include "applications/Application.hh" #include "common/Alignment.hh" #include "common/SequenceUtils.hh" #include "applications/SmallAssembler.hh" #include "assembly/SmallAssemblerImpl.hh" // boost #include // STL #include #include #include #include #include //#define DEBUG_ASBL namespace ca { namespace applications { /** * @class SmallAssembler * * @brief Performs a simple de-novo assembly using the groups of * shadow reads found by the indelfinder. * * Details of implementation are in c++/assembler/SmallAssemblerImpl * * We build a hash of the k-mers in the shadow reads. The most * frequent k-mer is used as seed and is then gradually extended. * * */ class SmallAssembler : public Application { public: // Tokenizer class for parsing typedef boost::tokenizer > LineTokenizer; // Vector of shadow reads in export file format. Also contains the approx. read coordinates. typedef std::vector< std::pair< std::pair, Alignment > > ShadowAlignmentVec; struct AssemblerStats { AssemblerStats() : numClusters(0), numContigs(0),clustLen(0),numSeeds(0),others(0) {} // number of shadow read cluster unsigned numClusters; // number of assembled contigs unsigned numContigs; // contigs discarded because of length unsigned clustLen; // contigs discarded because of too little reads unsigned numSeeds; // contigs discarded because of other reasons unsigned others; }; SmallAssembler(CasavaOptions &options) : Application(options){ } virtual ~SmallAssembler() {} /** * Reads the data and starts the assembly * * @return status */ int run(); private: void process_cluster(const SmallAssemblerOptions& asmOptions, AssemblerStats& asmStats, SmallAssemblerImpl::ShadowReadVec& shadows, ShadowAlignmentVec& shadowLines, const std::string& cluster_info, std::ostream& creads_os, std::ostream& contigs_os); void updateAlignment(Alignment& al, const int pos, const unsigned numContigs, const std::string& cseq); }; }} // end namespace casava{ namespace { applications #endif /* SMALLASSEMBLER_HH_ */