#!/usr/bin/make -rRf # Run the ABySS assembler. # Written by Shaun Jackman and # Anthony Raymond . ############# # MODIFIED FOR BIRCH SYSTEM TO USE A LOCALLY-INSTALLED COPY OF ZSH ############ SHELL=bash -e -o pipefail ifeq ($(shell $BIRCH/script/zsh -e -o pipefail -c 'true' 2>/dev/null; echo $$?), 0) # Set pipefail to ensure that all commands of a pipe succeed. $BIRCH/script/SHELL=zsh -e -o pipefail # Report run time and memory usage with zsh. export REPORTTIME=1 export TIMEFMT=time user=%U system=%S elapsed=%E cpu=%P memory=%M job=%J endif # Record run time and memory usage in a file using GNU time. ifdef time ifneq ($(shell command -v gtime),) gtime=command gtime -v -o $@.time else gtime=command time -v -o $@.time endif endif # Wrapper script for commands that require an increased stack size limit stack=abyss-stack-size 65536 # Define this environment variable on Mac OS X to read # compressed files. export DYLD_FORCE_FLAT_NAMESPACE=1 # Integrate with Sun Grid Engine (SGE) ifdef JOB_NAME name?=$(JOB_NAME) endif ifdef SGE_TASK_ID k?=$(SGE_TASK_ID) endif ifdef NSLOTS ifneq ($(NSLOTS), 1) np?=$(NSLOTS) endif endif # Integrate with Portable Batch System (PBS) ifdef PBS_JOBNAME name?=$(PBS_JOBNAME) endif ifdef PBS_ARRAYID k?=$(PBS_ARRAYID) endif ifdef PBS_NODEFILE NSLOTS=$(shell wc -l <$(PBS_NODEFILE)) ifneq ($(NSLOTS), 1) np?=$(NSLOTS) endif endif # Integrate with Load Sharing Facility (LSF) ifdef LSB_JOBNAME name?=$(LSB_JOBNAME) endif ifdef LSB_JOBINDEX k?=$(LSB_JOBINDEX) endif ifdef LSB_DJOB_NUMPROC ifneq ($(LSB_DJOB_NUMPROC), 1) np?=$(LSB_DJOB_NUMPROC) endif endif ifdef LSF_BINDIR mpirun?=$(LSF_BINDIR)/mpirun.lsf endif # Integrate with IBM LoadLeveler ifdef LOADL_JOB_NAME name?=$(LOADL_JOB_NAME) endif ifdef LOADL_STEP_ID k?=$(LOADL_STEP_ID) endif ifdef LOADL_HOSTFILE NSLOTS=$(shell wc -l <$(LOADL_HOSTFILE)) ifneq ($(NSLOTS), 1) np?=$(NSLOTS) endif endif # Integrate with SLURM ifdef SLURM_JOB_NAME name?=$(SLURM_JOB_NAME) endif ifdef SLURM_ARRAY_TASK_ID k?=$(SLURM_ARRAY_TASK_ID) endif ifdef SLURM_NTASKS np?=$(SLURM_NTASKS) endif # Determine the path to mpirun mpirun?=$(shell command -v mpirun) ifeq ($(mpirun),) mpirun=mpirun endif # Use pigz or bgzip for parallel compression if available. ifneq ($(shell command -v pigz),) gzip=pigz -p$j else ifneq ($(shell command -v bgzip),) gzip=bgzip -@$j else gzip=gzip endif endif ifndef preserve_path # Determine the path to the ABySS executables path?=$(shell dirname `command -v $(MAKEFILE_LIST)`) ifdef path PATH:=$(path):$(PATH) endif endif ifdef db # Determine the location of sqlite database override dbopt:=--db=$(db) # Track data details for database library ?= "" strain ?= "" species ?= "" endif # Programs MARKDOWN=pandoc map=$(foreach a,$(2),$(call $(1),$(a))) deref=$($1) ifdef lr ifndef lib lib:=$(pe) $(lr) endif endif ifdef lib in?=$(call map, deref, $(lib)) else ifdef in lib?=$(name) $(lib)?=$(in) endif endif pe?=$(lib) mp?=$(pe) # Strip spaces from the file paths ifdef in override in:=$(strip $(in)) endif ifdef se override se:=$(strip $(se)) endif ifdef lr override lr_reads=$(strip $(call map, deref, $(lr))) endif # Graph file format graph?=dot # g is private. Use graph instead. override g:=$(graph) # Number of threads ifdef PE_HOSTFILE hostname?=$(shell hostname -f) j?=$(shell awk '$$1 == "$(hostname)" {print $$2}' $(PE_HOSTFILE)) endif ifeq ($j,) j:=$(np) endif ifeq ($j,) j:=2 endif # ABYSS parameters q ?= 3 abyssopt += -k$k -q$q ifdef K abyssopt += -K$K endif ifdef e abyssopt += -e$e endif ifdef E abyssopt += -E$E endif ifdef t abyssopt += -t$t endif ifdef c abyssopt += -c$c endif ifdef kc abyssopt += --kc=$(kc) endif ifdef b abyssopt += -b$b pbopt += -b$b endif ifdef Q abyssopt += -Q$Q endif ifdef ss SS=--SS endif abyssopt += $v # additional params for Bloom filter assembly (`abyss-bloom-dbg`) ifdef B abyssopt += -b$B ifdef H abyssopt += -H$H endif ifdef j abyssopt += -j$j endif ifdef x abyssopt += -s$x endif else abyssopt += $(dbopt) $(SS) --coverage-hist=coverage.hist -s $*-bubbles.fa endif # AdjList parameters m?=0 alopt += $v $(dbopt) $(SS) -k$k -m$m ifndef B ifdef K alopt += -K$K endif endif # filtergraph parameters ifndef B ifdef K fgopt += --assemble --shim-max-degree=2 endif endif ifdef xtip fgopt += -t$(shell echo $k*2 |bc) endif # PopBubbles parameters p?=0.9 pbopt += -p$p ifdef a pbopt += -a$a endif # Keep sequence and quality strings of read alignments ifdef ssq ssq_t :=-ssq endif # Aligner parameters aligner?=map$(ssq_t) ifeq ($(SS),--SS) ifneq ($(aligner),map$(ssq_t)) $(warning warning: setting aligner to abyss-map$(ssq_t) since other aligners do not support ss=1) override aligner=map$(ssq_t) endif endif align?=abyss-$(aligner) mapopt=$v $(dbopt) -j$j -l$($*_l) $(SS) $(ALIGNER_OPTIONS) $(MAP_OPTIONS) # DIDA parameters escape_quotes=$(shell echo "$(1)" | sed 's|"|\\\"|g') ifeq ($(aligner),dida) ifdef np mapopt+=-n$(np) endif ifdef DIDA_RUN_OPTIONS mapopt+=$(DIDA_RUN_OPTIONS) endif ifdef DIDA_MPIRUN mapopt+=-m"$(call escape_quotes,$(DIDA_MPIRUN))" endif ifdef DIDA_OPTIONS mapopt+=-d"$(call escape_quotes,$(DIDA_OPTIONS))" endif endif # fixmate parameters ifeq ($(align),abyss-kaligner) fixmate?=ParseAligns else fixmate?=abyss-fixmate$(ssq_t) endif fmopt=$v $(dbopt) -l$($*_l) $(FIXMATE_OPTIONS) # DistanceEst parameters DistanceEst?=DistanceEst$(ssq_t) l?=40 s?=1000 n?=10 $(foreach i,$(pe),$(eval $i_l?=$l)) $(foreach i,$(pe),$(eval $i_s?=$s)) $(foreach i,$(pe),$(eval $i_n?=$n)) override deopt=$v $(dbopt) -j$j -k$k $(DISTANCEEST_OPTIONS) -l$($*_l) -s$($*_s) -n$($*_n) $($*_de) # SimpleGraph parameters sgopt += $(dbopt) -s$s -n$n ifdef d sgopt += -d$d endif # MergePaths parameters mpopt += $v $(dbopt) -j$j -k$k -s$s ifdef G mpopt += -G$G endif # PathOverlap parameters poopt += $v $(dbopt) -k$k # PathConsensus parameters pcopt += $(dbopt) ifdef a pcopt += -a$a endif pcopt += -p$p # MergeContigs parameters mcopt += $v $(dbopt) -k$k # Scaffold parameters L?=$l S?=1000-10000 N?=$n SCAFFOLD_DE_S?=$(shell echo $S | sed 's/-.*//') SCAFFOLD_DE_N?=$N SCAFFOLD_DE_OPTIONS?=$(DISTANCEEST_OPTIONS) $(foreach i,$(mp),$(eval $i_l?=$L)) $(foreach i,$(mp),$(eval $i_s?=$(SCAFFOLD_DE_S))) $(foreach i,$(mp),$(eval $i_n?=$(SCAFFOLD_DE_N))) override scaffold_deopt=$v $(dbopt) --dot --median -j$j -k$k $(SCAFFOLD_DE_OPTIONS) -l$($*_l) -s$($*_s) -n$($*_n) $($*_de) scopt += $v $(dbopt) $(SS) -k$k ifdef G scopt += -G$G endif # abyss-fac parameters ifdef G override facopt = -G$G endif # BWA-SW parameters bwaswopt=-t$j BWASW_OPTIONS='-b9 -q16 -r1 -w500' # Remove environment variables unexport in se $(lib) $(pe) $(mp) $(long) # Check the mandatory parameters ifndef name error:: @>&2 echo 'abyss-pe: missing parameter `name`' endif ifndef k error:: @>&2 echo 'abyss-pe: missing parameter `k`' endif ifeq ($(lib)$(in)$(se)$(lr),) error:: @>&2 echo 'abyss-pe: missing parameter `lib`, `in`, `se`, or `lr`' endif default: error:: @>&2 echo 'Try `abyss-pe help` for more information.' @false # Help and version messages help: @echo 'Usage: abyss-pe [OPTION]... [PARAMETER=VALUE]... [COMMAND]...' @echo 'Assemble reads into contigs and scaffolds. ABySS is a de novo' @echo 'sequence assembler intended for short paired-end reads and large' @echo 'genomes. See the abyss-pe man page for documentation of assembly' @echo 'parameters and commands. abyss-pe is a Makefile script, and so' @echo 'options of `make` may also be used with abyss-pe. See the `make`' @echo 'man page for documentation.' @echo @echo 'Report bugs to https://github.com/bcgsc/abyss/issues or abyss-users@bcgsc.ca.' version: @echo "abyss-pe (ABySS) 2.1.5" @echo "Written by Shaun Jackman and Anthony Raymond." @echo @echo "Copyright 2012 Canada's Michael Smith Genome Science Centre" versions: version @echo PATH=$(PATH) @ABYSS --version; echo @-ABYSS-P --version; echo @AdjList --version; echo @DistanceEst --version; echo @MergeContigs --version; echo @MergePaths --version; echo @Overlap --version; echo @PathConsensus --version; echo @PathOverlap --version; echo @PopBubbles --version; echo @SimpleGraph --version; echo @abyss-fac --version; echo @abyss-filtergraph --version; echo @abyss-fixmate --version; echo @abyss-map --version; echo @abyss-scaffold --version; echo @abyss-sealer --version; echo @abyss-todot --version; echo @$(align) --version; echo @awk --version; echo @sort --version; echo @-mpirun --version # Determine the default target ifdef db default: startDb endif default: unitigs ifneq ($(in),) default: contigs contigs-graph endif ifneq ($(mp),) default: scaffolds scaffolds-graph ifneq ($(long),) default: long-scaffs long-scaffs-graph endif endif ifdef db default: finishDb endif default: stats # Define the commands (phony targets) unitigs: $(name)-unitigs.fa unitigs-graph: $(name)-unitigs.$g pe-index: $(name)-3.fa.fm pe-sam: $(addsuffix -3.sam.gz, $(pe)) pe-bam: $(addsuffix -3.bam.bai, $(pe)) contigs: $(name)-contigs.fa contigs-graph: $(name)-contigs.$g mp-index: $(name)-6.fa.fm mp-sam: $(addsuffix -6.sam.gz, $(mp)) mp-bam: $(addsuffix -6.bam.bai, $(mp)) scaffolds: $(name)-scaffolds.fa scaffolds-graph: $(name)-scaffolds.$g seal-scaffolds: $(name)-scaffolds-sealed.fa scaftigs: $(name)-scaftigs.fa $(name)-scaftigs.agp long-scaffs: $(name)-long-scaffs.fa long-scaffs-graph: $(name)-long-scaffs.$g all: default bam stats clean: rm -f *.adj *.asqg *.dot *.gfa *.sam *.txt \ *.sam.gz *.hist *.dist *.path *.path[123] ifdef db .PHONY: startDb finishDb endif .PHONY: bam default stats \ unitigs unitigs-graph \ pe-index pe-sam pe-bam contigs contigs-graph \ mp-index mp-sam mp-bam scaffolds scaffolds-graph \ scaftigs long-scaffs long-scaffs-graph seal-scaffolds \ all clean help version versions .DELETE_ON_ERROR: .SECONDARY: # Utilities %.fa.fai: %.fa abyss-index $v --fai $< %.fa.fm: %.fa abyss-index $v $< %.bam: %.sam.gz samtools view -Sb $< -o $@ %.bam.bai: %.bam samtools index $< # Assemble unitigs ifdef db startDb: @echo -e \ $(name)".sqlite\n"\ > name.txt @echo -e \ $(shell echo `date +%s`"_"`whoami`_)\ $(shell echo $$RANDOM \% 1000 \+ 1 | bc)"\n"\ $(library)"\n"\ $(strain)"\n"\ $(species)"\n"\ $(name)"\n"\ $(k)"\n"\ $(lib)$(in)$(se)\ > db.txt endif ifdef B %-1.fa: $(gtime) $(stack) abyss-bloom-dbg $(abyssopt) $(ABYSS_OPTIONS) $(in) $(se) > $@ else ifdef K ifdef np %-1.fa: $(gtime) $(mpirun) -np $(np) abyss-paired-dbg-mpi $(abyssopt) $(ABYSS_OPTIONS) -o $*-1.fa $(in) $(se) else %-1.fa %-1.$g: $(gtime) abyss-paired-dbg $(abyssopt) $(ABYSS_OPTIONS) -o $*-1.fa -g $*-1.$g $(in) $(se) endif else ifdef np %-1.fa: $(gtime) $(mpirun) -np $(np) ABYSS-P $(abyssopt) $(ABYSS_OPTIONS) -o $@ $(in) $(se) else %-1.fa: $(gtime) ABYSS $(abyssopt) $(ABYSS_OPTIONS) -o $@ $(in) $(se) endif # Find overlapping contigs %-1.$g: %-1.fa $(gtime) AdjList $(alopt) --$g $< >$@ # Remove shim contigs %-2.$g1 %-1.path: %-1.$g %-1.fa $(gtime) abyss-filtergraph $v --$g $(fgopt) $(FILTERGRAPH_OPTIONS) -k$k -g $*-2.$g1 $^ >$*-1.path %-2.fa %-2.$g: %-1.fa %-2.$g1 %-1.path $(gtime) MergeContigs --$g $(mcopt) -g $*-2.$g -o $*-2.fa $^ # Pop bubbles %-2.path %-3.$g: %-2.fa %-2.$g $(gtime) PopBubbles $v --$g -j$j -k$k $(SS) $(pbopt) $(POPBUBBLES_OPTIONS) -g $*-3.$g $^ >$*-2.path %-3.fa: %-2.fa %-2.$g %-2.path $(gtime) MergeContigs $(mcopt) -o $@ $^ awk '!/^>/ {x[">" $$1]=1; next} {getline s} $$1 in x {print $$0 "\n" s}' \ $*-2.path $*-1.fa >$*-indel.fa %-unitigs.fa: %-3.fa ln -sf $< $@ %-unitigs.$g: %-3.$g ln -sf $< $@ # Estimate distances between unitigs %-3.sam.gz %-3.hist: $(name)-3.fa $(gtime) $(align) $(mapopt) $(strip $($*)) $< \ |$(fixmate) $(fmopt) -h $*-3.hist \ |sort -snk3 -k4 \ |$(gzip) >$*-3.sam.gz %-3.bam %-3.hist: $(name)-3.fa $(gtime) $(align) $(mapopt) $(strip $($*)) $< \ |$(fixmate) $(fmopt) -h $*-3.hist \ |sort -snk3 -k4 \ |samtools view -Sb - -o $*-3.bam %-3.dist: %-3.sam.gz %-3.hist gunzip -c $< \ |$(gtime) $(DistanceEst) $(deopt) -o $@ $*-3.hist %-3.dist: %-3.bam %-3.hist $(gtime) samtools view -h $< \ |$(DistanceEst) $(deopt) -o $@ $*-3.hist %-3.dist: $(name)-3.fa $(gtime) $(align) $(mapopt) $(strip $($*)) $< \ |$(fixmate) $(fmopt) -h $*-3.hist \ |sort -snk3 -k4 \ |$(DistanceEst) $(deopt) -o $@ $*-3.hist dist=$(addsuffix -3.dist, $(pe)) ifneq ($(name)-3.dist, $(dist)) $(name)-3.dist: $(name)-3.fa $(dist) $(gtime) abyss-todot $v --dist -e $^ >$@ $(name)-3.bam: $(addsuffix -3.bam, $(pe)) $(gtime) samtools merge -r $@ $^ endif # Find overlaps between contigs %-4.fa %-4.$g: %-3.fa %-3.$g %-3.dist $(gtime) Overlap $v --$g $(SS) $(OVERLAP_OPTIONS) -k$k -g $*-4.$g -o $*-4.fa $^ # Assemble contigs %-4.path1: %-4.$g %-3.dist $(gtime) $(stack) SimpleGraph $v $(sgopt) $(SIMPLEGRAPH_OPTIONS) -j$j -k$k -o $@ $^ %-4.path2: %-4.path1 %-3.fa.fai %-4.fa.fai cat $*-3.fa.fai $*-4.fa.fai \ |$(gtime) MergePaths $(mpopt) $(MERGEPATHS_OPTIONS) -o $@ - $< %-4.path3: %-4.$g %-4.path2 PathOverlap --assemble $(poopt) $(SS) $^ >$@ ifndef cs %-5.path %-5.fa %-5.$g: %-3.fa %-4.fa %-4.$g %-4.path3 cat $(wordlist 1, 2, $^) \ |$(gtime) $(stack) PathConsensus $v --$g -k$k $(pcopt) $(PATHCONSENSUS_OPTIONS) -o $*-5.path -s $*-5.fa -g $*-5.$g - $(wordlist 3, 4, $^) %-6.fa: %-3.fa %-4.fa %-5.fa %-5.$g %-5.path cat $(wordlist 1, 3, $^) |$(gtime) MergeContigs $(mcopt) -o $@ - $(wordlist 4, 5, $^) else %-5.$g %-5.path: %-4.$g %-4.path3 ln -sf $*-4.$g $*-5.$g ln -sf $*-4.path3 $*-5.path %-cs.fa: %-3.fa %-4.fa %-4.$g %-4.path3 cat $(wordlist 1, 2, $^) |$(gtime) MergeContigs $(mcopt) -o $@ - $(wordlist 3, 4, $^) # Convert colour-space sequence to nucleotides %-6.fa: %-cs.fa $(gtime) KAligner $v --seq -m -j$j -l$l $(in) $(se) $< \ |Consensus $v -o $@ $< endif %-6.$g: %-5.$g %-5.path $(gtime) PathOverlap --overlap $(poopt) --$g $^ >$@ %-contigs.fa: %-6.fa ln -sf $< $@ %-contigs.$g: %-6.$g ln -sf $< $@ # Estimate distances between contigs %-6.sam.gz %-6.hist: $(name)-6.fa $(gtime) $(align) $(mapopt) $(strip $($*)) $< \ |$(fixmate) $(fmopt) -h $*-6.hist \ |sort -snk3 -k4 \ |$(gzip) >$*-6.sam.gz %-6.bam %-6.hist: $(name)-6.fa $(gtime) $(align) $(mapopt) $(strip $($*)) $< \ |$(fixmate) $(fmopt) -h $*-6.hist \ |sort -snk3 -k4 \ |samtools view -Sb - -o $*-6.bam %-6.dist.dot: %-6.sam.gz %-6.hist gunzip -c $< \ |$(gtime) $(DistanceEst) $(scaffold_deopt) -o $@ $*-6.hist %-6.dist.dot: %-6.bam %-6.hist samtools view -h $< \ |$(gtime) $(DistanceEst) $(scaffold_deopt) -o $@ $*-6.hist %-6.dist.dot: $(name)-6.fa $(gtime) $(align) $(mapopt) $(strip $($*)) $< \ |$(fixmate) $(fmopt) -h $*-6.hist \ |sort -snk3 -k4 \ |$(DistanceEst) $(scaffold_deopt) -o $@ $*-6.hist # Scaffold %-6.path: $(name)-6.$g $(addsuffix -6.dist.dot, $(mp)) $(gtime) abyss-scaffold $(scopt) -s$S -n$N -g $@.dot $(SCAFFOLD_OPTIONS) $^ >$@ %-7.path %-7.$g %-7.fa: %-6.fa %-6.$g %-6.path $(gtime) $(stack) PathConsensus $v --$g -k$k $(pcopt) $(PATHCONSENSUS_OPTIONS) -s $*-7.fa -g $*-7.$g -o $*-7.path $^ %-8.fa: %-6.fa %-7.fa %-7.$g %-7.path cat $(wordlist 1, 2, $^) \ |$(gtime) MergeContigs $(mcopt) -o $@ - $(wordlist 3, 4, $^) %-8.$g: %-7.$g %-7.path $(gtime) PathOverlap --overlap $(poopt) --$g $^ >$@ # Scaffold using linked reads ifdef lr # Tigmint # Options for mapping the reads to the draft assembly. lr_l?=$l override lrmapopt=$v -j$j -l$(lr_l) $(LR_MAP_OPTIONS) # Options for abyss-scaffold lr_s?=1000-100000 lr_n?=5-20 # Minimum AS/Read length ratio tigmint_as?=0.65 # Maximum number of mismatches tigmint_nm?=5 # Minimum mapping quality threshold tigmint_mapq?=0 # Maximum distance between reads to be considered the same molecule tigmint_d?=50000 # Minimum number of spanning molecules tigmint_n?=10 # Size of the window that must be spanned by moecules tigmint_w?=1000 # Align paired-end reads to the draft genome, sort by BX tag, # and create molecule extents BED. %.lr.bed: %.fa.fai $(gtime) $(align) $(lrmapopt) $(lr_reads) $*.fa \ | samtools sort -@$j -tBX -l0 -T$$(mktemp -u -t $@.XXXXXX) \ | tigmint-molecule -a $(tigmint_as) -n $(tigmint_nm) -q $(tigmint_mapq) -d $(tigmint_d) - \ | sort -k1,1 -k2,2n -k3,3n >$@ # Align paired-end reads to the draft genome and sort by BX tag. %.lr.sortbx.bam: %.fa.fai $(gtime) $(align) $(lrmapopt) $(lr_reads) $*.fa \ | samtools sort -@$j -tBX -T$$(mktemp -u -t $@.XXXXXX) -o $@ # Filter the BAM file, create molecule extents BED. %.lr.bed: %.lr.sortbx.bam $(gtime) tigmint-molecule -a $(tigmint_as) -n $(tigmint_nm) -q $(tigmint_mapq) -d $(tigmint_d) $< \ | sort -k1,1 -k2,2n -k3,3n >$@ # Cut sequences at assembly errors. %.tigmint.fa: %.lr.bed %.fa %.fa.fai $(gtime) tigmint-cut -p$j -n$(tigmint_n) -w$(tigmint_w) -o $@ $*.fa $< # ARCS arcs_c?=2 arcs_d?=0 arcs_e?=30000 arcs_l?=0 arcs_m?=4-20000 arcs_r?=0.05 arcs_s?=98 arcs_z?=500 # Align reads and create a graph of linked contigs using ARCS. %.arcs.dist.gv: %.fa $(gtime) $(align) $(lrmapopt) $(lr_reads) $< \ | abyss-fixmate-ssq --all --qname $v -l$(lr_l) $(FIXMATE_OPTIONS) \ | arcs $v -c$(arcs_c) -d$(arcs_d) -e$(arcs_e) -l$(arcs_l) -m$(arcs_m) -r$(arcs_r) -s$(arcs_s) -z$(arcs_z) \ -g $*.arcs.dist.gv --tsv=$*.arcs.tsv --barcode-counts=$*.arcs.barcode-counts.tsv /dev/stdin # Align paired-end reads to the draft genome and do not sort. %.lr.sortn.sam.gz: %.fa $(gtime) $(align) $(lrmapopt) $(lr_reads) $< \ | abyss-fixmate-ssq --all --qname $v -l$(lr_l) $(FIXMATE_OPTIONS) \ | $(gzip) >$@ # Create a graph of linked contigs using ARCS. %.arcs.dist.gv: %.lr.sortn.sam.gz gunzip -c $< \ |$(gtime) arcs $v -c$(arcs_c) -d$(arcs_d) -e$(arcs_e) -l$(arcs_l) -m$(arcs_m) -r$(arcs_r) -s$(arcs_s) -z$(arcs_z) \ -g $*.arcs.dist.gv --tsv=$*.arcs.tsv --barcode-counts=$*.arcs.barcode-counts.tsv /dev/stdin # Scaffold using ARCS and abyss-scaffold. %.arcs.path: %.arcs.dist.gv $(gtime) abyss-scaffold $(scopt) -s$(lr_s) -n$(lr_n) -g $@.dot $(LR_SCAFFOLD_OPTIONS) $< >$@ # Create the FASTA file of ARCS scaffolds. %.arcs.fa: %.fa %.arcs.path $(gtime) MergeContigs $(mcopt) -o $@ $^ %-scaffolds.fa: %-8.tigmint.arcs.fa ln -sf $< $@ else %-scaffolds.fa: %-8.fa ln -sf $< $@ endif %-scaffolds.$g: %-8.$g ln -sf $< $@ # Sealed Scaffold sealer_ks?=-k90 -k80 -k70 -k60 -k50 -k40 -k30 %-8_scaffold.fa: %-8.fa $(gtime) abyss-sealer -v -j$j --print-flanks -o$*-8 -S$< $(sealer_ks) $(SEALER_OPTIONS) $(in) $(se) %-scaffolds-sealed.fa: %-8_scaffold.fa ln -s $< $@ # Scaftig %-scaftigs.fa: %-scaffolds-agp.fa ln -sf $< $@ %-scaftigs.agp: %-scaffolds.agp ln -sf $< $@ # Transcriptome assisted scaffolding %.fa.bwt: %.fa $(gtime) bwa index $< %-8.sam.gz: $(name)-8.fa.bwt $(gtime) bwa mem -a -t$j -S -P -k$l $(name)-8.fa $(strip $($*)) \ |$(gzip) >$@ %-8.dist.dot: %-8.sam.gz $(gtime) abyss-longseqdist -k$k $(LONGSEQDIST_OPTIONS) $< \ |grep -v "l=" >$@ %-8.path: $(name)-8.$g $(addsuffix -8.dist.dot, $(long)) $(gtime) abyss-scaffold $(scopt) -s$S -n1 -g $@.$g $(SCAFFOLD_OPTIONS) $^ >$@ %-9.path %-9.$g %-9.fa: %-8.fa %-8.$g %-8.path $(gtime) $(stack) PathConsensus $v --$g -k$k $(pcopt) $(PATHCONSENSUS_OPTIONS) -s $*-9.fa -g $*-9.$g -o $*-9.path $^ %-10.fa: %-8.fa %-9.fa %-9.$g %-9.path cat $(wordlist 1, 2, $^) \ |$(gtime) MergeContigs $(mcopt) -o $@ - $(wordlist 3, 4, $^) %-10.$g: %-9.$g %-9.path $(gtime) PathOverlap --overlap $(poopt) --$g $^ >$@ %-long-scaffs.fa: %-10.fa ln -sf $< $@ %-long-scaffs.$g: %-10.$g ln -sf $< $@ ifdef db finishDb: @rm -f db.txt endif # Create the final BAM file ifneq ($(mp),) bam: $(name)-scaffolds.bam.bai else ifneq ($(in),) bam: $(name)-contigs.bam.bai else bam: $(name)-unitigs.bam.bai endif endif $(name)-unitigs.bam: %.bam: %.fa $(gtime) $(align) $v -j$j -l$l $(ALIGNER_OPTIONS) $(se) $< \ |samtools view -Su - |samtools sort -o - - >$@ $(name)-contigs.bam $(name)-scaffolds.bam: %.bam: %.fa $(gtime) $(align) $v -j$j -l$l $(ALIGNER_OPTIONS) \ $(call map, deref, $(sort $(lib) $(pe) $(mp))) $< \ |$(fixmate) $v $(FIXMATE_OPTIONS) \ |sort -snk3 -k4 \ |samtools view -Sb - >$@ # Align the variants to the assembly %-variants.bam: %.fa.bwt $(gtime) bwa bwasw -t$j $*.fa <(cat $(name)-bubbles.fa $(name)-indel.fa) \ |samtools view -Su - |samtools sort -o - - >$@ %-variants.vcf.gz: %.fa %-variants.bam $(gtime) samtools mpileup -Buf $^ |bcftools view -vp1 - |bgzip >$@ %.gz.tbi: %.gz $(gtime) tabix -pvcf $< # Calculate assembly contiguity statistics stats: $(name)-stats.tab $(name)-stats $(name)-stats.csv $(name)-stats.md ifneq ($(shell command -v $(MARKDOWN)),) stats: $(name)-stats.html endif %-stats: %-stats.tab ln -sf $< $@ $(name)-stats.tab: %-stats.tab: %-unitigs.fa ifneq ($(in),) $(name)-stats.tab: %-stats.tab: %-contigs.fa endif ifneq ($(mp),) $(name)-stats.tab: %-stats.tab: %-scaffolds.fa endif ifneq ($(long),) $(name)-stats.tab: %-stats.tab: %-long-scaffs.fa endif $(name)-stats.tab: abyss-fac $(facopt) $(FAC_OPTIONS) $^ |tee $@ %.csv: %.tab tr '\t' , <$< >$@ %.md: %.tab abyss-tabtomd $< >$@ %.html: %.md $(MARKDOWN) $< >$@ # Create an AGP file and FASTA file of scaftigs from scaffolds %.agp %-agp.fa: %.fa $(gtime) abyss-fatoagp $(FATOAGP_OPTIONS) -f $*-agp.fa $< >$*.agp # Align the contigs to the reference %-$(ref).sam.gz: %.fa $(gtime) bwa bwasw $(bwaswopt) $(BWASW_OPTIONS) $($(ref)) $< |$(gzip) >$@ # Find breakpoints in the alignments %.break: %.sam.gz $(gtime) abyss-samtobreak $(SAMTOBREAK_OPTIONS) $< >$@ # Report ABySS configuration variable(s) and value(s) currently set. override varList := a b c d e E G j k l m n N p q s S t v cs pi \ np pe lib mp se SS hostname xtip \ ssq ssq_ti path name in mpirun \ aligner long ref fixmate DistanceEst \ map deref abyssopt fgopt pbopt \ align mapopt fmopt deopt \ pcopt sgopt bwaswopt \ ABYSS_OPTIONS FILTERGRAPH_OPTIONS POPBUBBLES_OPTIONS \ OVERLAP_OPTIONS SIMPLEGRAPH_OPTIONS MERGEPATHS_OPTIONS \ SCAFFOLD_OPTIONS ALIGNER_OPTIONS MAP_OPTIONS FAC_OPTIONS \ FIXMATE_OPTIONS BWASW_OPTIONS FATOAGP_OPTIONS SAMTOBREAK_OPTIONS \ MARKDOWN env: @echo 'List of ABySS configuration variables currently set:' @echo '[environment], if variable was inherited from the environment.' @echo '[command line], if variable was defined on the command line.' @echo '[file], if variable was defined in (this) makefile.' @echo '[override], if variable was defined with an override directive in (this) makefile.' @$(foreach var,$(varList),\ echo -e $(var)" = "$($(var))"\t["$(origin $(var))"]";)