# Note: "SO: undefined" needs to be checked for a later SO definition # Updated to EMBL/GenBank/DDBJ FTv5.0 by Peter Rice 17th Jan 2003 # Updated to EMBL/GenBank/DDBJ FTv6.2 by Peter Rice 27th Jan 2005 # New in 6.2 is alternate mandatory qualifiers, shown by m rather than M # where e.g. /citation or /compare is used for a conflict # Updated to EMBL/GenBank/DDBJ FTv6.5 by Peter Rice 30th May 2006 # Updated to EMBL/GenBank/DDBJ FTv8.0 by Peter Rice 12th March 2008 # "-" is a placeholder for no key; should be used when the need is # merely to mark region in order to comment on it or to use it in # another feature's location; # Features in EMBL feature table with SO aliases # ============================================== # SO: A biological feature that can be # attributed to a region of biological sequence. misc_feature SO:0000110 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /number /old_locus_tag /phenotype /product /pseudo /standard_name # 1) region of DNA at which regulation of termination of transcription # occurs, which controls the expression of some bacterial operons; 2) # sequence segment located between the promoter and the first # structural gene that causes partial termination of transcription # SO: A sequence segment located between the promoter and a structural # gene that causes partial termination of transcription. attenuator SO:0000140 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /operon /phenotype # constant region of immunoglobulin light and heavy chains, and T-cell # receptor alpha, beta, and gamma chains; includes one or more exons # depending on the particular chain #org=prokaryotes #mol=DNA C_region SO:0000478 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /product /pseudo /standard_name # CAAT box; part of a conserved sequence located about 75 bp up-stream # of the start point of eukaryotic transcription units which may be # involved in RNA polymerase binding; # SO: consensus=GG(C or T)CAATCT [1,2]. Part of a conserved sequence # located about 75-bp upstream of the start point of eukaryotic # transcription units which may be involved in RNA polymerase binding; # consensus=GG(C|T)CAATCT. #up=CDS #org=eukaryotes CAAT_signal SO:0000172 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag # coding sequence; sequence of nucleotides that corresponds with the # sequence of amino acids in a protein (location includes stop codon); # feature includes amino acid conceptual translation. # SO: A contiguous sequence which begins with, and includes, a start # codon and ends with, and includes, a stop codon. #org=eukaryotes,eukaryotic_viruses #mol=DNA CDS SO:0000316 # no mandatory /translation or /pseudo in FTv 6.5 /allele /citation /codon /codon_start /db_xref /EC_number /exception /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /number /old_locus_tag /operon /product /protein_id /pseudo /ribosomal_slippage /standard_name /translation /transl_except /transl_table /trans_splicing # independent determinations of the "same" sequence differ at this site # or region; # # Or /compare=[accession-number.sequence-version] conflict SO:0001085 m/citation m/compare /allele # New in 8.0 /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag # use /replace="" for a deletion /replace # displacement loop; a region within mitochondrial DNA in which a # short stretch of RNA is paired with one strand of DNA, displacing # the original partner DNA strand in this region; also used to # describe the displacement of a region of one strand of duplex DNA by # a single stranded invader in the reaction catalyzed by RecA protein # SO: Displacement loop; a region within mitochondrial DNA in which a # short stretch of RNA is paired with one strand of DNA, displacing # the original partner DNA strand in this region; also used to # describe the displacement of a region of one strand of duplex DNA by # a single stranded invader in the reaction catalyzed by RecA protein. D-loop SO:0000297 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag # Diversity segment of immunoglobulin heavy chain, and T-cell receptor # beta chain; # SO: germline genomic DNA including D-region with 5' UTR and 3' UTR, also # designated as D-segment. #mol=DNA D_segment SO:0000458 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /product /pseudo /standard_name # a cis-acting sequence that increases the utilization of (some) # eukaryotic promoters, and can function in either orientation and in # any location (upstream or downstream) relative to the promoter; # SO: A cis-acting sequence that increases the utilization of (some) # eukaryotic promoters, and can function in either orientation and in # any location (upstream or downstream) relative to the promoter. #up=CDS #org=eukaryotes enhancer SO:0000165 /allele /bound_moiety /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /standard_name # region of genome that codes for portion of spliced mRNA, rRNA and # tRNA; may contain 5'UTR, all CDSs and 3' UTR; # SO: A region of the genome that codes for portion of spliced # messenger RNA (SO:0000234); may contain 5'-untranslated region # (SO:0000204), all open reading frames (SO:0000236) and # 3'-untranslated region (SO:0000205). #org=eukaryotes,eukaryotic_viruses exon SO:0000147 /allele /citation /db_xref /EC_number /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /number /old_locus_tag /product /pseudo /standard_name # gap in the sequence # SO: A gap in the sequence of known length. The unkown bases are # filled in with N's. # New in 6.2 gap SO:0000730 M/estimated_length /experiment /inference /map /note # GC box; a conserved GC-rich region located upstream of the start # point of eukaryotic transcription units which may occur in multiple # copies or in either orientation; consensus=GGGCGG; # SO: A conserved GC-rich region located upstream of the start point # of eukaryotic transcription units which may occur in multiple copies # or in either orientation; consensus=GGGCGG. GC_signal SO:0000173 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag # region of biological interest identified as a gene and for which a # name has been assigned; # SO: A locatable region of genomic sequence, corresponding to a unit # of inheritance, which is associated with regulatory regions, # transcribed regions and/or other functional sequence regions #org=eukaryotes,eukaryotic_viruses gene SO:0000704 /allele /citation /db_xref /experiment /function # gene added as genbank has several entries /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /operon /product /pseudo /phenotype /standard_name /trans_splicing # DNA which is eliminated through any of several kinds of recombination; # SO: Genomic sequence removed from the genome, as a normal event, by # a process of recombination. iDNA SO:0000723 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /number /old_locus_tag /standard_name # a segment of DNA that is transcribed, but removed from within the # transcript by splicing together the sequences (exons) on either side # of it; # SO: A segment of DNA that is transcribed, but removed from within # the transcript by splicing together the sequences (exons) on either # side of it. #mol=DNA intron SO:0000188 /allele /citation # /cons_splice is used only when one of the intron's splice # sites does not match the GT...AG consensus /cons_splice /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /number /old_locus_tag /pseudo /standard_name # joining segment of immunoglobulin light and heavy chains, and T-cell # receptor alpha, beta, and gamma chains; J_segment SO:0000470 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /locus_tag /map /note /old_locus_tag /product /pseudo /standard_name # long terminal repeat, a sequence directly repeated at both ends of a # defined sequence, of the sort typically found in retroviruses; # SO: A sequence directly repeated at both ends of a defined sequence, # of the sort typically found in retroviruses. #up=CDS #org=eukaryotes LTR SO:0000286 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /standard_name # mature peptide or protein coding sequence; coding sequence for the # mature or final peptide or protein product following # post-translational modification; the location does not include the # stop codon (unlike the corresponding CDS); # SO: The coding sequence for the mature or final peptide or protein # product following post-translational modification. mat_peptide SO:0000419 /allele /citation /db_xref /EC_number /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /product /pseudo /standard_name # site in nucleic acid which covalently or non-covalently binds # another moiety that cannot be described by any other binding key # (primer_bind or protein_bind); # SO: A region on the surface of a molecule that may interact with # another molecule. misc_binding SO:0000409 M/bound_moiety /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag # feature sequence is different from that presented in the entry and # cannot be described by any other Difference key (conflict, unsure, # old_sequence, variation, or modified_base); # SO: A region where the sequences differs from that of a specified # sequence. misc_difference SO:0000413 /allele /citation /clone /compare /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /phenotype /replace /standard_name # site of any generalized, site-specific or replicative recombination # event where there is a breakage and reunion of duplex DNA that # cannot be described by other recombination keys or qualifiers of # source key (/insertion_seq, /transposon, /proviral); # SO: undefined general term that includes iDNA # sequence_rearrangement_feature haplotype_block and # recombination_hotspot misc_recomb SO:0000298 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag # No longer mandatory /organism from FTv5.0 # If omitted, assume only the SOURCE organism is involved /organism /standard_name # any transcript or RNA product that cannot be defined by other RNA # keys (prim_transcript, precursor_RNA, mRNA, 5'clip, 3'clip, 5'UTR, # 3'UTR, exon, CDS, sig_peptide, transit_peptide, mat_peptide, intron, # polyA_site, rRNA, tRNA, scRNA, and snRNA); # SO: An RNA synthesized on a DNA or RNA template by an RNA # polymerase. #mol=DNA misc_RNA SO:0000673 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /operon /product /pseudo /standard_name /trans_splicing # any region containing a signal controlling or altering gene function # or expression that cannot be described by other signal keys # (promoter, CAAT_signal, TATA_signal, -35_signal, -10_signal, # GC_signal, RBS, polyA_signal, enhancer, attenuator, terminator, and # rep_origin). # SO: A DNA sequence that controls the expression of a gene. misc_signal SO:0005836 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /operon /phenotype /standard_name # any secondary or tertiary nucleotide structure or conformation that # cannot be described by other Structure keys (stem_loop and D-loop); # SO: A folded sequence. misc_structure SO:0000002 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /standard_name # the indicated nucleotide is a modified nucleotide and should be # substituted for by the indicated molecule (given in the mod_base # qualifier value) # SO: A modified nucleotide, i.e. a nucleotide other than A, T, C. G # or (in RNA) U. modified_base SO:0000305 M/mod_base /allele /citation /db_xref /experiment /frequency /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag # messenger RNA; includes 5'untranslated region (5'UTR), coding # sequences (CDS, exon) and 3'untranslated region (3'UTR); # SO: messenger RNA is the intermediate molecule between DNA and # protein. It includes UTR and coding sequences. It does not contain # introns. mRNA SO:0000234 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /operon /product /pseudo /standard_name /trans_splicing # An RNA transcript that does not encode for a protein rather the RNA # molecule is the gene product # A ncRNA is a processed_transcript, so it may not contain parts such # as transcribed_spacer_regions that are removed in the act of # processing. # the ncRNA feature is not used for ribosomal and transfer # RNA annotation, for which the rRNA and tRNA feature keys # should be used, respectively; # New in FT 7.0 ncRNA SO:0000655 m/ncRNA_class /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /operon /product /pseudo /standard_name /trans_splicing # extra nucleotides inserted between rearranged immunoglobulin # segments. N_region SO:0000563 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /product /pseudo /standard_name # the presented sequence revises a previous version of the sequence at # this location; # # Or /compare=[accession-number.sequence-version] #up=CDS #org=eukaryotes old_sequence SO:0001082 m/citation m/compare /allele /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag #use /replace="" to annotate a deletion /replace # region containing polycistronic transcript containing genes that # encode enzymes that are in the same metabolic pathway and regulatory # sequences # SO: A group of contiguous genes transcribed as a single # (polycistronic) mRNA from a single regulatory region. # New in FT 6.2 operon SO:0000178 M/operon /allele /citation /db_xref /experiment /function /inference /label /map /note /phenotype /pseudo /standard_name # origin of transfer; region of a DNA molecule where transfer is # initiated during the process of conjugation or mobilization # SO: A region of a DNA molecule whre transfer is initiated during the # process of conjugation or mobilization. # New in FT 6.2 oriT SO:0000724 /allele /bound_moiety /citation /db_xref # Only /direction=LEFT or RIGHT are valid /direction /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /rpt_family /rpt_type /rpt_unit_range /rpt_unit_seq /standard_name # recognition region necessary for endonuclease cleavage of an RNA # transcript that is followed by polyadenylation; consensus=AATAAA # [1]; # SO: The recognition sequence necessary for endonuclease cleavage of # an RNA transcript that is followed by polyadenylation; # consensus=AATAAA. #mol=DNA polyA_signal SO:0000551 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag # site on an RNA transcript to which will be added adenine residues by # post-transcriptional polyadenylation; # SO: The site on an RNA transcript to which will be added adenine # residues by post-transcriptional polyadenylation. #org=eukaryotes,eukaryotic_viruses polyA_site SO:0000553 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag # any RNA species that is not yet the mature RNA product; may include # 5' clipped region (5'clip), 5' untranslated region (5'UTR), coding # sequences (CDS, exon), intervening sequences (intron), 3' # untranslated region (3'UTR), and 3' clipped region (3'clip); # SO: The primary (initial, unprocessed) transcript; includes # five_prime_clip (SO:0000555), five_prime_untranslated_region # (SO:0000204), open reading frames (SO:0000236), introns (SO:0000188) # and three_prime_ untranslated_region (three_prime_UTR), and # three_prime_clip (SO:0000557). precursor_RNA SO:0000185 /allele # /citation /db_xref /evidence /function /gene /label /locus_tag /map # /note /old_locus_tag /operon # removed in FTv5.0 for new entries - # use <> in location instead # /partial # added in FTv3.0 Jan 2001 # /product /standard_name /usedin #org=eukaryotes,eukaryotic_viruses precursor_RNA SO:0000835 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /operon /product /standard_name /trans_splicing #SO: The primary (initial, unprocessed) transcript; includes # five_prime_clip (SO:0000555), five_prime_untranslated_region # (SO:0000204), open reading frames (SO:0000236), introns (SO:0000188) # and three_prime_ untranslated_region (three_prime_UTR), and # three_prime_clip (SO:0000557) prim_transcript SO:0000185 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /operon /standard_name # non-covalent primer binding site for initiation of replication, # transcription, or reverse transcription; includes site(s) for # synthetic e.g., PCR primer elements; # SO: Non-covalent primer binding # site for initiation of replication, transcription, or reverse # transcription. primer_bind SO:0005850 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /PCR_conditions /standard_name # region on a DNA molecule involved in RNA polymerase binding to # initiate transcription; # SO: The region on a DNA molecule involved in RNA polymerase binding # to initiate transcription. promoter SO:0000167 /allele /bound_moiety /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /operon /phenotype /pseudo /standard_name # non-covalent protein binding site on nucleic acid; # SO: A region of a molecule that binds to a protein. #mol=DNA # Use RBS instead for ribosome binding sites protein_bind SO:0000410 M/bound_moiety /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /operon /standard_name # ribosome binding site; # SO: Region in mRNA where ribosome assembles RBS SO:0000139 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /standard_name # region of genome containing repeating units; # SO: A region of sequence containing one or more repeat units. repeat_region SO:0000657 SO:0000705 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /mobile_element /note /old_locus_tag /rpt_family /rpt_type /rpt_unit_range /rpt_unit_seq /satellite /standard_name # single repeat element; # SO: A single repeat element. # removed in FTv8.0 #repeat_unit SO:0000726 # /allele # /citation # /db_xref # /experiment # /function # /gene # /gene_synonym # /inference # /label # /locus_tag # /map # /note # /old_locus_tag # /rpt_family # /rpt_type # /rpt_unit_range # /rpt_unit_seq # SO: regulatory_region, equivalent to EMBL/GB misc_signal rep_origin SO:0000296 /allele /citation /db_xref /direction /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /standard_name # mature ribosomal RNA ; RNA component of the ribonucleoprotein # particle (ribosome) which assembles amino acids into proteins. # SO: RNA that comprises part of a ribosome, and that can provide both # structural scaffolding and catalytic activity. rRNA SO:0000252 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag # use /product for rRNA sizes /product /pseudo /standard_name # switch region of immunoglobulin heavy chains; involved in the # rearrangement of heavy chain DNA leading to the expression of a # different immunoglobulin class from the same B-cell; S_region SO:0000301_S_region /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /product /pseudo /standard_name # many tandem repeats (identical or related) of a short basic # repeating unit; many have a base composition or other property # different from the genome average that allows them to be separated # from the bulk (main band) genomic DNA; # SO: The many tandem repeats (identical or related) of a short basic # repeating unit; many have a base composition or other property # different from the genome average that allows them to be separated # from the bulk (main band) genomic DNA. #up=misc_signal #org=eukaryotes # Removed FTv8.0 #satellite SO:0000005 # /allele # /citation # /db_xref # /experiment # /gene # /gene_synonym # /inference # /label # /locus_tag # /map # /note # /old_locus_tag # /rpt_type # /rpt_family # /rpt_unit_range # /rpt_unit_seq # /standard_name # small cytoplasmic RNA; any one of several small cytoplasmic RNA # molecules present in the cytoplasm and (sometimes) nucleus of a # eukaryote; # SO: Any one of several small cytoplasmic RNA moleculespresent in the # cytoplasm and sometimes nucleus of a eukaryote. #mol=DNA # removed FTv8.0 #scRNA SO:0000013 # /allele # /citation # /db_xref # /experiment # /function # /gene # /gene_synonym # /inference # /label # /locus_tag # /map # /note # /old_locus_tag # /product # /pseudo # /standard_name # signal peptide coding sequence; coding sequence for an N-terminal # domain of a secreted protein; this domain is involved in attaching # nascent polypeptide to the membrane leader sequence; # SO: The sequence for an N-terminal domain of a secreted protein; # this domain is involved in attaching nascent polypeptide to the # membrane leader sequence. sig_peptide SO:0000418 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /product /pseudo /standard_name # small nucleolar RNA molecules mostly involved in rRNA modification # and processing; # SO: Small nucleolar RNAs (snoRNAs) are involved in the processing # and modification of rRNA in the nucleolus. There are two main # classes of snoRNAs: the box C/D class, and the box H/ACA class. U3 # snoRNA is a member of the box C/D class. Indeed, the box C/D element # is a subset of the six short sequence elements found in all U3 # snoRNAs, namely boxes A, A', B, C, C', and D. The U3 snoRNA # secondary structure is characterised by a small 5' domain (with # boxes A and A'), and a larger 3' domain (with boxes B, C, C', and # D), the two domains being linked by a single-stranded hinge. Boxes B # and C form the B/C motif, which appears to be exclusive to U3 # snoRNAs, and boxes C' and D form the C'/D motif. The latter is # functionally similar to the C/D motifs found in other snoRNAs. The # 5' domain and the hinge region act as a pre-rRNA-binding domain. The # 3' domain has conserved protein-binding sites. Both the box B/C and # box C'/D motifs are sufficient for nuclear retention of U3 # snoRNA. The box C'/D motif is also necessary for nucleolar # localization, stability and hypermethylation of U3 snoRNA. Both box # B/C and C'/D motifs are involved in specific protein interactions # and are necessary for the rRNA processing functions of U3 snoRNA. # replaced by ncrna FTv7.0 #snoRNA SO:0000275 # /allele # /citation # /db_xref # /experiment # /function # /gene # /gene_synonym # /inference # /label # /locus_tag # /map # /note # /old_locus_tag # /product # /pseudo # /standard_name # small nuclear RNA molecules involved in pre-mRNA splicing and # processing # SO: Small non-coding RNA in the nucleoplasm. # replaced by ncrna FTv7.0 #snRNA SO:0000274 # /allele # /citation # /db_xref # /experiment # /function # /gene # /gene_synonym # /inference # /label # /locus_tag # /map # /note # /old_locus_tag # /product # /pseudo # /standard_name #identifies the biological source of the specified span of the #sequence; this key is mandatory; more than one source key per #sequence is allowed; every entry/record will have, as a minimum, #either a single source key spanning the entire sequence or multiple #source keys which together span the entire #sequence. /mol_type="genomic DNA", "genomic RNA", "mRNA", "tRNA", #"rRNA", "snoRNA", "snRNA", "scRNA", "pre-RNA", "other RNA", "other #DNA", "unassigned DNA", "unassigned RNA" # SO: The sequence referred to by an entry in a databank such as # Genbank or SwissProt. source SO:2000061 M/organism # New in FTv5.0 # mandatory from 01-July-2003 M/mol_type /bio_material /cell_line /cell_type /chromosome /citation /clone /clone_lib /collected_by /collection_date /country /cultivar /culture_collection # new in FTv7.0 /db_xref /dev_stage /ecotype # /environmental_sample and /isolation_source should appear together /environmental_sample /focus /frequency /germline /haplotype /host # new in FTv8.0 /identified_by /isolate # /environmental_sample and /isolation_source should appear together /isolation_source /lab_host /label /lat_lon /macronuclear /map /mating_type # New in FTv8.0 /note /organelle /PCR_primers /plasmid /pop_variant /proviral /rearranged /segment /serotype /serovar /sex /specimen_voucher # /specific_host # removed in FTv8.0 /strain /sub_clone /sub_species /sub_strain /tissue_lib /tissue_type # transgenic is supposed to be only in extra source keys for an entry /transgenic /variety # /virion # removed in FTv8.0 # hairpin; a double-helical region formed by base-pairing between # adjacent (inverted) complementary sequences in a single strand of # RNA or DNA. # SO: A double-helical region of nucleic acid formed by base-pairing between # adjacent (inverted) complementary sequences. stem_loop SO:0000313 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /operon /standard_name # short, single-copy DNA sequence that characterizes a mapping # landmark on the genome and can be detected by PCR; a region of the # genome can be mapped by determining the order of a series of STSs; # SO: Short (typically a few hundred base pairs) DNA sequence that has # a single occurrence in a genome and whose location and base sequence # are known. STS SO:0000331 # Location to include the primer(s) annotated usually in primer_bind feature /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /standard_name # TATA box; Goldberg-Hogness box; a conserved AT-rich septamer found # about 25 bp before the start point of each eukaryotic RNA polymerase # II transcript unit which may be involved in positioning the enzyme # for correct initiation; consensus=TATA(A or T)A(A or T) [1,2]; # SO: A conserved AT-rich septamer found about 25-bp before the start # point of many eukaryotic RNA polymerase II transcript units; may be # involved in positioning the enzyme for correct initiation; # consensus=TATA(A|T)A(A|T). #mol=DNA #up=misc_binding TATA_signal SO:0000174 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag # sequence of DNA located either at the end of the transcript that # causes RNA polymerase to terminate transcription; # SO: The sequence of DNA located either at the end of the transcript that # causes RNA polymerase to terminate transcription. #org=eukaryotes,eukaryotic_viruses #mol=DNA terminator SO:0000141 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /operon /standard_name # New in Fv7.0 # transfer messenger RNA; tmRNA acts as a tRNA first, and then as an # mRNA that encodes a peptide tag; the ribosome translates this mRNA # region of tmRNA and attaches the encoded peptide tag to the # C-terminus of the unfinished protein; this attached tag targets the # protein for destruction or proteolysis; tmRNA SO:0000584 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /product /pseudo /standard_name /tag_peptide # transit peptide coding sequence; coding sequence for an N-terminal # domain of a nuclear-encoded organellar protein; this domain is # involved in post-translational import of the protein into the # organelle; # SO: The coding sequence for an N-terminal domain of a # nuclear-encoded organellar protein: this domain is involved in post # translational import of the protein into the organelle. #mol=DNA transit_peptide SO:0000725 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /product /pseudo /standard_name # mature transfer RNA, a small RNA molecule (75-85 bases long) that # mediates the translation of a nucleic acid sequence into an amino # acid sequence; # SO: Transfer RNA (tRNA) molecules are approximately 80 nucleotides # in length. Their secondary structure includes four short # double-helical elements and three loops (D, anti-codon, and T # loops). Further hydrogen bonds mediate the characteristic L-shaped # molecular structure. tRNAs have two regions of fundamental # functional importance: the anti-codon, which is responsible for # specific mRNA codon recognition, and the 3' end, to which the tRNA's # corresponding amino acid is attached (by aminoacyl-tRNA # synthetases). tRNAs cope with the degeneracy of the genetic code in # two manners: having more than one tRNA (with a specific anti-codon) # for a particular amino acid; and 'wobble' base-pairing, # i.e. permitting non-standard base-pairing at the 3rd anti-codon # position. tRNA SO:0000253 /allele /anticodon /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /product /pseudo /standard_name /trans_splicing # New in FTv6.5 # author is unsure of exact sequence in this region; unsure SO:0001086 /allele /citation /compare /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /replace # variable region of immunoglobulin light and heavy chains, and T-cell # receptor alpha, beta, and gamma chains; codes for the variable amino # terminal portion; can be composed of V_segments, D_segments, # N_regions, and J_segments; V_region SO:0000938 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /product /pseudo /standard_name # variable segment of immunoglobulin light and heavy chains, and # T-cell receptor alpha, beta, and gamma chains; codes for most of the # variable region (V_region) and the last few amino acids of the # leader peptide; #up=CDS #org=eukaryotes V_segment SO:0000466 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /product /pseudo /standard_name # a related strain contains stable mutations from the same gene (e.g., # RFLPs, polymorphisms, etc.) which differ from the presented sequence # at this location (and possibly others); # SO: A region of sequence where variation has been observed. #up=CDS #org=eukaryotes # was variation SO:0000109 but this is now obsoleted in SO variation SO:0001060 SO:0000109 /allele /citation /compare /db_xref /experiment /frequency /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /phenotype /product # Use /replace="" for deletion /replace /standard_name # 3'-most region of a precursor transcript that is clipped off during # processing; # SO: 3'-most region of a precursor transcript that is clipped off # during processing. # Removed in FTv7.0 #3'clip SO:0000557 # /allele # /citation # /db_xref # /experiment # /function # /gene # /gene_synonym # /inference # /label # /locus_tag # /map # /note # /old_locus_tag # /standard_name # region at the 3' end of a mature transcript (following the stop # codon) that is not translated into a protein; # SO: A region at the 3' end of a mature transcript (following the # stop codon) that is not translated into a protein. 3'UTR SO:0000205 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /standard_name /trans_splicing # 5'-most region of a precursor transcript that is clipped off during # processing; # SO: 5' most region of a precursor transcript that is clipped off # during processing. # Removed in FTv7.0 #5'clip SO:0000555 # /allele # /citation # /db_xref # /experiment # /function # /gene # /gene_synonym # /inference # /label # /locus_tag # /map # /note # /old_locus_tag # /standard_name # region at the 5' end of a mature transcript (preceding the # initiation codon) that is not translated into a protein; # SO: A region at the 5' end of a mature transcript (preceding the # initiation codon) that is not translated into a protein. 5'UTR SO:0000204 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /standard_name /trans_splicing # Pribnow box; a conserved region about 10 bp upstream of the start # point of bacterial transcription units which may be involved in # binding RNA polymerase; consensus=TAtAaT [1,2,3,4]; # SO: A conserved region about 10-bp upstream of the start point of # bacterial transcription units which may be involved in binding RNA # polymerase; consensus=TAtAaT. -10_signal SO:0000175 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /operon /standard_name # a conserved hexamer about 35 bp upstream of the start point of # bacterial transcription units; consensus=TTGACa or TGTTGACA; # SO: A conserved hexamer about 35-bp upstream of the start point of # bacterial transcription units; consensus=TTGACa or TGTTGACA. #org=prokaryotes #mol=DNA -35_signal SO:0000176 /allele /citation /db_xref /experiment /gene /gene_synonym /inference /label /locus_tag /map /note /old_locus_tag /operon /standard_name # "-" is a placeholder for no key; should be used when the need is # merely to mark region in order to comment on it or to use it in # another feature's location; # Otherwise, use misc_feature # SO: A biological feature that can be # attributed to a region of biological sequence. #org=prokaryotes #mol=DNA - SO:0000001 /allele /citation /db_xref /experiment /function /gene /gene_synonym /inference /label /locus_tag /map /note /number /old_locus_tag /phenotype /product /pseudo /standard_name