# The New Way
# ===========
# This next bit of code use Bio.SeqIO to parse a FASTA file

from Bio import SeqIO

def extract_organisms(file_to_parse, format):
    all_species = []
    for cur_record in SeqIO.parse(open(file_to_parse), format) :
        # extract the info from the description
        new_species = cur_record.description.split()[1]

        # append the new species to the list if it isn't there
        if new_species not in all_species:
            all_species.append(new_species)

    return all_species

if __name__ == "__main__":
    print "Using Bio.SeqIO on a FASTA file"
    all_species = extract_organisms("ls_orchid.fasta", "fasta")
    print "number of species:", len(all_species)
    print 'species names:', all_species


# The Old Way
# ===========
# This next bit of code still works fine, it uses Bio.Fasta instead

from Bio import Fasta

def extract_organisms(file_to_parse):
    # set up the parser and iterator
    parser = Fasta.RecordParser()
    file = open(file_to_parse, 'r')
    iterator = Fasta.Iterator(file, parser)

    all_species = []

    while 1:
        cur_record = iterator.next()

        if cur_record is None:
            break
        
        # extract the info from the title
        new_species = cur_record.title.split()[1]

        # append the new species to the list if it isn't there
        if new_species not in all_species:
            all_species.append(new_species)

    return all_species

if __name__ == "__main__":
    print "Using Bio.Fasta"
    all_species = extract_organisms("ls_orchid.fasta")
    print "number of species:", len(all_species)
    print 'species names:', all_species