#@UGENE_WORKFLOW #ChIP-seq analysis is started from MACS tool. CEAS then takes peak regions and signal wiggle file to check which chromosome is enriched with binding/modification sites, whether bindings events are significant at gene features like promoters, gene bodies, exons, introns or UTRs, and the signal aggregation at gene transcription start/end sites or meta-gene bodies (average all genes). Then peaks are investigated in these ways: 1. to check which genes are nearby so can be regarded as potential regulated genes, then perform GO analysis; 2. to check the conservation scores at the binding sites;3 the DNA motifs at binding sites. workflow "ChIP-seq analysis"{ macs-id { type:macs-id; name:"Find Peaks with MACS"; output-dir:tools_output; } ceas-report { type:ceas-report; name:"Create CEAS Report"; anns-file:ceas_annotations.xls; image-file:ceas_report.pdf; } conservation_plot-id { type:conservation_plot-id; name:"Build Conservation Plot"; output-file:conservation.bmp; } seqpos-id { type:seqpos-id; name:"Collect Motifs with SeqPos"; output-dir:tools_output; } peak2gene-id { type:peak2gene-id; name:"Annotate Peaks with Peak2gene"; } conduct-go-id { type:conduct-go-id; name:"Conduct GO"; output-dir:tools_output; } write-annotations { type:write-annotations; name:"Write Gene Annotations"; url-out:genes.gb; } write-annotations-1 { type:write-annotations; name:"Write Peak Annotations"; url-out:peaks.gb; } get-file-list { type:get-file-list; name:"Read Tags"; url-in { dataset:Dataset; } .validator { type:script; script { function endsWith(str, suffix) { var index = str.toString().lastIndexOf(suffix); return (index !== -1) && (index + suffix.length == str.toString().length); } function isExtentionValid(file, possibleExts) { var i; for (i = 0; i < possibleExts.length; i++) { if (endsWith(file, possibleExts[i])) { return true; } } return false; } var inputDatasets = ctx.attributeValue("url-in"); var possibleExts = [".txt", ".bam", ".sam", ".bed", ".ebwt"]; var i; loops: for (i = 0; i < inputDatasets.length; i++) { var j; for (j = 0; j < inputDatasets[i].length; j++) { var file = inputDatasets[i][j]; if (isExtentionValid(file, possibleExts) == false) { ctx.warning("MACS tool supports the following formats: ELAND, BED, ELANDMULTI, ELANDEXPORT, ELANDMULTIPET (for pair-end tags), SAM, BAM or BOWTIE.\nAn error may occur if input data does not correspond the requirements."); break loops; } } } } } } .actor-bindings { macs-id.out-data->ceas-report.in-data macs-id.out-data->conservation_plot-id.in-data macs-id.out-data->seqpos-id.in-data macs-id.out-data->peak2gene-id.in-data peak2gene-id.out-data->conduct-go-id.in-data peak2gene-id.out-data->write-annotations.in-annotations peak2gene-id.out-data->write-annotations-1.in-annotations get-file-list.out-url->macs-id.in-data } get-file-list.url->macs-id.in-data._treatment-ann macs-id.wiggle-treat->ceas-report.in-data.enrichment-signal macs-id.peak-regions->ceas-report.in-data.peak-regions macs-id.peak-summits->conservation_plot-id.in-data.cp_treat-ann macs-id.peak-summits->seqpos-id.in-data.cp_treat-ann macs-id.peak-summits->peak2gene-id.in-data._treat-ann peak2gene-id.gene-annotation-url->conduct-go-id.in-data.in-ann peak2gene-id.gene-annotation->write-annotations.in-annotations.annotations peak2gene-id.peak-annotation->write-annotations-1.in-annotations.annotations .meta { parameter-aliases { macs-id.output-dir { alias:out_macs; } macs-id.tag-size { alias:tag_size; } ceas-report.anns-file { alias:out_ceas_ann; } ceas-report.image-file { alias:out_ceas_report; } conservation_plot-id.output-file { alias:out_plot; } seqpos-id.output-dir { alias:out_seqpos; } conduct-go-id.output-dir { alias:out_conduct_go; } write-annotations.url-out { alias:out_peak2gene_genes; } write-annotations-1.url-out { alias:out_peak2gene_peaks; } get-file-list.url-in { alias:in; } } visual { scale:90; ceas-report { pos:"-1062 -564"; style:ext; bg-color-ext:"128 128 0 64"; bounds:"-30 -30 148.25 84"; in-data.angle:18.6279; } conduct-go-id { pos:"-1061 -338"; style:ext; bg-color-ext:"128 0 0 64"; in-data.angle:19.6538; } conservation_plot-id { pos:"-465.333 -788.889"; style:ext; bg-color-ext:"128 128 0 64"; in-data.angle:242.13; } get-file-list { pos:"-1055.56 -788.889"; style:ext; bg-color-ext:"24 102 175 64"; bounds:"-30 -30 102.292 68.5556"; out-url.angle:360; } macs-id { pos:"-822 -789"; style:ext; bg-color-ext:"0 128 0 64"; in-data.angle:180; out-data.angle:322.481; } peak2gene-id { pos:"-822 -564"; style:ext; bg-color-ext:"128 128 0 64"; in-data.angle:19.2339; out-data.angle:289.204; } seqpos-id { pos:"-462 -594"; style:ext; bg-color-ext:"128 128 0 64"; in-data.angle:180; } write-annotations { pos:"-652 -308"; style:ext; bg-color-ext:"64 26 96 64"; in-annotations.angle:42.0492; } write-annotations-1 { pos:"-854 -306"; style:ext; bg-color-ext:"64 26 96 64"; in-annotations.angle:25.7693; } get-file-list.out-url->macs-id.in-data { text-pos:"-42.3047 -28"; } macs-id.out-data->ceas-report.in-data { text-pos:"-87 -18"; } macs-id.out-data->conservation_plot-id.in-data { text-pos:"-34 -36"; } macs-id.out-data->peak2gene-id.in-data { text-pos:"-54 -11"; } macs-id.out-data->seqpos-id.in-data { text-pos:"-27 -16"; } peak2gene-id.out-data->conduct-go-id.in-data { text-pos:"-59 -24"; } peak2gene-id.out-data->write-annotations-1.in-annotations { text-pos:"-38 -8"; } peak2gene-id.out-data->write-annotations.in-annotations { text-pos:"3 -24"; } } estimations { function cistromeTime(tagsSize, dbCount) { return 240*tagsSize*dbCount; } var db = new String(utils.attributeValue("seqpos-id.motif_db")); var dbCount = db.split(',').length; var sets = utils.attributeValue("get-file-list.url-in"); var time = 0; var ram = 0; for (var i=0; i