#@UGENE_WORKFLOW #ChIP-seq analysis is started from MACS tool. CEAS then takes peak regions and signal wiggle file to check which chromosome is enriched with binding/modification sites, whether bindings events are significant at gene features like promoters, gene bodies, exons, introns or UTRs, and the signal aggregation at gene transcription start/end sites or meta-gene bodies (average all genes). Then peaks are investigated in these ways: 1. to check which genes are nearby so can be regarded as potential regulated genes, then perform GO analysis; 2. to check the conservation scores at the binding sites;3 the DNA motifs at binding sites. workflow "ChIP-seq analysis with control"{ macs-id { type:macs-id; name:"Find Peaks with MACS"; output-dir:tools_output; } ceas-report { type:ceas-report; name:"Create CEAS Report"; anns-file:ceas_annotations.xls; image-file:ceas_report.pdf; } conservation_plot-id { type:conservation_plot-id; name:"Build Conservation Plot"; output-file:conservation.bmp; } seqpos-id { type:seqpos-id; name:"Collect Motifs with SeqPos"; output-dir:tools_output; } peak2gene-id { type:peak2gene-id; name:"Annotate peaks with Peak2gene"; } conduct-go-id { type:conduct-go-id; name:"Conduct GO"; output-dir:tools_output; } multiplexer { type:multiplexer; name:Multiplexer; } write-annotations { type:write-annotations; name:"Write Gene Annotations"; document-format:bed; url-out:genes.bed; } write-annotations-1 { type:write-annotations; name:"Write Peak Annotations"; document-format:bed; url-out:peaks.bed; } get-file-list { type:get-file-list; name:"Treatment Tags"; url-in { dataset:"Dataset 1"; } .validator { type:script; script { function endsWith(str, suffix) { var index = str.toString().lastIndexOf(suffix); return (index !== -1) && (index + suffix.length == str.toString().length); } function isExtentionValid(file, possibleExts) { var i; for (i = 0; i < possibleExts.length; i++) { if (endsWith(file, possibleExts[i])) { return true; } } return false; } var inputDatasets = ctx.attributeValue("url-in"); var possibleExts = [".txt", ".bam", ".sam", ".bed", ".ebwt"]; var i; loops: for (i = 0; i < inputDatasets.length; i++) { var j; for (j = 0; j < inputDatasets[i].length; j++) { var file = inputDatasets[i][j]; if (isExtentionValid(file, possibleExts) == false) { ctx.warning("MACS tool supports the following formats: ELAND, BED, ELANDMULTI, ELANDEXPORT, ELANDMULTIPET (for pair-end tags), SAM, BAM or BOWTIE.\nAn error may occur if input data does not correspond the requirements."); break loops; } } } } } } get-file-list-1 { type:get-file-list; name:"Control Tags"; url-in { dataset:"Dataset 1"; } .validator { type:script; script { function endsWith(str, suffix) { var index = str.toString().lastIndexOf(suffix); return (index !== -1) && (index + suffix.length == str.toString().length); } function isExtentionValid(file, possibleExts) { var i; for (i = 0; i < possibleExts.length; i++) { if (endsWith(file, possibleExts[i])) { return true; } } return false; } var inputDatasets = ctx.attributeValue("url-in"); var possibleExts = [".txt", ".bam", ".sam", ".bed", ".ebwt"]; var i; loops: for (i = 0; i < inputDatasets.length; i++) { var j; for (j = 0; j < inputDatasets[i].length; j++) { var file = inputDatasets[i][j]; if (isExtentionValid(file, possibleExts) == false) { ctx.warning("MACS tool supports the following formats: ELAND, BED, ELANDMULTI, ELANDEXPORT, ELANDMULTIPET (for pair-end tags), SAM, BAM or BOWTIE.\nAn error may occur if input data does not correspond the requirements."); break loops; } } } } } } .actor-bindings { get-file-list.out-url->multiplexer.input-data-1 get-file-list-1.out-url->multiplexer.input-data-2 multiplexer.output-data->macs-id.in-data peak2gene-id.out-data->conduct-go-id.in-data peak2gene-id.out-data->write-annotations.in-annotations peak2gene-id.out-data->write-annotations-1.in-annotations macs-id.out-data->ceas-report.in-data macs-id.out-data->conservation_plot-id.in-data macs-id.out-data->seqpos-id.in-data macs-id.out-data->peak2gene-id.in-data } get-file-list.url->macs-id.in-data._treatment-ann get-file-list-1.url->macs-id.in-data.control-ann macs-id.wiggle-treat->ceas-report.in-data.enrichment-signal macs-id.peak-regions->ceas-report.in-data.peak-regions macs-id.peak-summits->conservation_plot-id.in-data.cp_treat-ann macs-id.peak-summits->seqpos-id.in-data.cp_treat-ann macs-id.peak-summits->peak2gene-id.in-data._treat-ann peak2gene-id.gene-annotation->conduct-go-id.in-data.in-ann peak2gene-id.gene-annotation->write-annotations.in-annotations.annotations peak2gene-id.peak-annotation->write-annotations-1.in-annotations.annotations .meta { parameter-aliases { macs-id.output-dir { alias:macs_out; } macs-id.tag-size { alias:tag_size; } ceas-report.anns-file { alias:ceas_annotation_out; } ceas-report.image-file { alias:ceas_report; } conservation_plot-id.output-file { alias:plot_out; } seqpos-id.output-dir { alias:seqpos_out; } conduct-go-id.output-dir { alias:GO_out; } } visual { scale:85; ceas-report { pos:"-746.412 -554.765"; style:ext; bg-color-ext:"128 128 0 64"; in-data.angle:14.3148; } conduct-go-id { pos:"-775.824 -335.471"; style:ext; bg-color-ext:"112 34 0 64"; in-data.angle:14.6383; } conservation_plot-id { pos:"-165.294 -689.353"; style:ext; bg-color-ext:"128 128 0 64"; in-data.angle:180; } get-file-list { pos:"-858.824 -851.765"; style:ext; bg-color-ext:"24 102 175 64"; bounds:"-30 -30 70.5882 66"; out-url.angle:360; } get-file-list-1 { pos:"-860 -715.294"; style:ext; bg-color-ext:"24 102 175 64"; bounds:"-30 -30 68.2353 58.9412"; out-url.angle:360; } macs-id { pos:"-453 -806"; style:ext; bg-color-ext:"0 128 0 64"; in-data.angle:220.601; out-data.angle:300.069; } multiplexer { pos:"-634.059 -781.471"; style:simple; bg-color-simple:"84 84 84 255"; input-data-1.angle:150; input-data-2.angle:210; output-data.angle:360; } peak2gene-id { pos:"-481.765 -538.176"; style:ext; bg-color-ext:"128 128 0 64"; in-data.angle:27.8321; out-data.angle:292.805; } seqpos-id { pos:"-173.529 -547.588"; style:ext; bg-color-ext:"128 128 0 64"; in-data.angle:180; } write-annotations { pos:"-316.412 -330.706"; style:ext; bg-color-ext:"64 26 96 64"; in-annotations.angle:53.9726; } write-annotations-1 { pos:"-539.765 -328.353"; style:ext; bg-color-ext:"64 26 96 64"; in-annotations.angle:23.9625; } get-file-list-1.out-url->multiplexer.input-data-2 { text-pos:"-29.3047 -15"; } get-file-list.out-url->multiplexer.input-data-1 { text-pos:"-31.3047 -9"; } macs-id.out-data->ceas-report.in-data { text-pos:"-79 -4"; } macs-id.out-data->conservation_plot-id.in-data { text-pos:"-12 -16"; } macs-id.out-data->peak2gene-id.in-data { text-pos:"-60 1"; } macs-id.out-data->seqpos-id.in-data { text-pos:"-18 6"; } multiplexer.output-data->macs-id.in-data { text-pos:"-47 -25"; } peak2gene-id.out-data->conduct-go-id.in-data { text-pos:"-74 -26"; } peak2gene-id.out-data->write-annotations-1.in-annotations { text-pos:"-40 -6"; } peak2gene-id.out-data->write-annotations.in-annotations { text-pos:"-9 -14"; } } estimations { function cistromeTime(tagsSize, dbCount) { return 240*tagsSize*dbCount; } var db = new String(utils.attributeValue("seqpos-id.motif_db")); var dbCount = db.split(',').length; var sets = utils.attributeValue("get-file-list.url-in"); var time = 0; var ram = 0; for (var i=0; i