# This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. # from __future__ import print_function import sys import os from reportlab.lib import colors from reportlab.lib.units import cm from Bio.Graphics.GenomeDiagram import Diagram, CrossLink from Bio.SeqFeature import SeqFeature, FeatureLocation from Bio import SeqIO # Modify this line to point at the Artemis/ACT example data which is online at: # https://github.com/sanger-pathogens/Artemis/tree/master/etc input_folder = "/Applications/Artemis/Artemis.app/Contents/artemis/etc" name = "af063097_v_b132222" file_a = "af063097.embl" file_b = "b132222.embl" format_a = "embl" format_b = "embl" file_a_vs_b = "af063097_v_b132222.crunch" for f in [file_a, file_b, file_a_vs_b]: if not os.path.isfile(os.path.join(input_folder, f)): print("Missing input file %s.fna" % f) sys.exit(1) # Only doing a_vs_b here, could also have b_vs_c and c_vs_d etc genomes = [ (os.path.join(input_folder, file_a), format_a), (os.path.join(input_folder, file_b), format_b), ] comparisons = [os.path.join(input_folder, file_a_vs_b)] # Create diagram with tracks, each with a feature set assert len(genomes) >= 2 and len(genomes) == len(comparisons) + 1 gd_diagram = Diagram(name, track_size=0.35, circular=False) tracks = dict() feature_sets = dict() records = dict() for f, format in genomes: records[f] = SeqIO.read(f, format) tracks[f] = gd_diagram.new_track(1, name=f, start=0, end=len(records[f]), scale_smalltick_interval=1000, scale_largetick_interval=10000, greytrack=True, greytrack_labels=0) feature_sets[f] = tracks[f].new_set() print("Drawing matches...") for i, crunch_file in enumerate(comparisons): q = genomes[i + 1][0] # query file s = genomes[i][0] # subject file q_set = feature_sets[q] s_set = feature_sets[s] with open(crunch_file) as handle: for line in handle: if line[0] == "#": continue parts = line.rstrip("\n").split(None, 7) # 0 = score # 1 = id # 2 = S1 # 3 = E1 # 4 = seq1 # 5 = S2 # 6 = E2 # 7 = seq2 try: q_start, q_end = int(parts[2]), int(parts[3]) s_start, s_end = int(parts[5]), int(parts[6]) except IndexError: sys.stderr.write(repr(line) + "\n") sys.stderr.write(repr(parts) + "\n") raise flip = False if q_start > q_end: flip = not flip q_start, q_end = q_end, q_start if s_start > s_end: flip = not flip s_start, s_end = s_end, s_start if flip: c = colors.Color(0, 0, 1, alpha=0.25) b = False else: c = colors.Color(1, 0, 0, alpha=0.25) b = False q_feature = q_set.add_feature(SeqFeature(FeatureLocation(q_start - 1, q_end)), color=c, border=b) s_feature = s_set.add_feature(SeqFeature(FeatureLocation(s_start - 1, s_end)), color=c, border=b) gd_diagram.cross_track_links.append(CrossLink(q_feature, s_feature, c, b)) # NOTE: We are using the same colour for all the matches, # with transparency. This means overlayed matches will appear darker. # It also means the drawing order not very important. # Note ACT puts long hits at the back, and colours by hit score print("Drawing CDS features...") for f, format in genomes: record = records[f] feature_set = feature_sets[f] # Mark the CDS features for cds in record.features: if cds.type != "CDS": continue feature_set.add_feature(cds, sigil="ARROW", color=colors.lightblue, border=colors.blue) gd_diagram.draw(format="linear", fragments=3, orientation="landscape", pagesize=(20 * cm, 10 * cm)) gd_diagram.write(name + ".pdf", "PDF") gd_diagram.draw(format="circular", orientation="landscape", pagesize=(20 * cm, 20 * cm)) gd_diagram.write(name + "_c.pdf", "PDF")