#!/usr/bin/env python3 import sys # CLU2IG - Convert interleaved Clustal V .aln output to sequential # .ig format suitable for use with MASE # Brian Fristensky August 3, 1992 # Usage: cul2ig clustalfile > masefile """ ensure that there are enough command line arguments to parse """ if len(sys.argv) < 2: print("Usage: cul2ig.py clustalfile > masefile"); exit(); name_list = () sequences = {} # Clean up the file by removing title lines, '*' and blank lines h_file = open(sys.argv[1]) for line in h_file: if line != '' and "CLUSTAL V" not in line and '*' not in line: TEMP.CLU2MASE # Create a namefile from the first 16 columns of the cleaned-up file. name = line[:16] if not sequences[name]: seuqneces[name] = line[16:80] name_list.append(name) h_file.close() # Find out how many unique names there are, and store in the environment # variable numnames. Write the first $NUMNAMES lines of RAWNAMES.CLU2MASE # to UNIQUE.CLU2MASE name_list = set(name_list.sort()) # For each unique name in the namefile, grep out the corresponding lines # and write to the output file for name in name_list: #create a dummy comment print ";" #write the name print name print sequences[name]