## A simple example snakemake .smk file for parallelising freebayes
## Uses a fasta_generate_regions to split the genome into regions of equal size based on the .fai index
## As snakemake automatically moves each cpu core to the next genome chunk, this works out faster
## than the freebayes-parallel wrapper.
## This .smk file assumes we have a list of the bam files called bam.list
## This .smk file splits the genome by chromosome, which of course, is not necessary.
## One will want to edit the paths (for example, the path to bam files)


# these parameters should usually be stored in the snakemake configuration file (config.yaml) and accessed e.g. config['ref']
samples = ['SampleA', 'SampleB', 'SampleC']
reference = "path/to/reference"
chroms = [1,2,3]
nchunks = 9

bamlist = "path/to/bam.list"
chunks = list(range(1,nchunks+1))

rule GenomeIndex:
    input:
        ref = reference
    output:
        idx = reference + ".fai"
    log: 
        "logs/GenomeIndex.log"
    wrapper: 
        "v0.69.0/bio/samtools/faidx"


rule GenerateFreebayesRegions:
    input:
        ref_idx = reference,
        index = reference + ".fai",
        bams = expand("resources/alignments/{sample}.bam", sample=samples)
    output:
        regions = expand("resources/regions/genome.{chrom}.region.{i}.bed", chrom=chroms, i = chunks)
    log:
        "logs/GenerateFreebayesRegions.log"
    params:
        chroms = chroms,
        chunks = chunks
    conda:
        "../envs/freebayes-env.yaml"
    script:
        # "../scripts/GenerateFreebayesRegions.R" # This is located in the scripts/ directory of freebayes
        "../scripts/fasta_generate_regions.py --chunks --bed resources/regions/genome --chromosome {params.chroms} {input.index} {params.chunks}"


rule VariantCallingFreebayes:
    input:
        bams = expand("resources/alignments/{sample}.bam", sample=samples),
        index = expand("resources/alignments/{sample}.bam.bai", sample=samples),
        ref = reference,
        samples = bamlist,
        regions = "resources/regions/genome.{chrom}.region.{i}.bed"
    output:
        temp("results/variants/vcfs/{chrom}/variants.{i}.vcf")
    log:
        "logs/VariantCallingFreebayes/{chrom}.{i}.log"
    conda:
        "../envs/freebayes-env.yaml"
    threads:1
    shell:	"freebayes -f {input.ref} -t {input.regions} -L {input.samples} > {output} 2> {log}"


rule ConcatVCFs:
    input:
        calls = expand("results/variants/vcfs/{{chrom}}/variants.{i}.vcf", i=chunks)
    output:
        "results/variants/vcfs/variants.{chrom}.vcf"
    log:
        "logs/ConcatVCFs/{chrom}.log"
    conda:
        "../envs/freebayes-env.yaml"
    threads:4
    shell:  
        "bcftools concat {input.calls} | vcfuniq > {output} 2> {log}"