# SOFA ontology terms acceptable as feature types in GFF3 # Automatically extracted from sofa.definition 8 June 2006 # SO:ke # A biological feature that can be attributed to a region of biological # sequence. sequence_feature SO:0000110 # SO:ke # The reverse complement of the primary transcript. antisense_primary_transcript SO:0000645 # SO:ke # Antisense RNA is RNA that is transcribed from the coding, rather than # the template, strand of DNA. It is therefore complementary to mRNA. antisense_RNA SO:0000644 # SO:ma # A sequence that can autonomously replicate, as a plasmid, when # transformed into a bacterial host. ARS SO:0000436 # SO:ma # A sequence of nucleotides that has been algorithmically derived from an # alignment of two or more different sequences. assembly SO:0000353 # SO:ke # A region of sequence which may be used to manufacture a longer # assembled, sequence. assembly_component SO:0000143 # http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types # A sequence segment located between the promoter and a structural gene # that causes partial termination of transcription. attenuator SO:0000140 # SO:ke # A self spliced intron. autocatalytically_spliced_intron SO:0000588 # SO:ke # A region on the surface of a molecule that may interact with another # molecule. binding_site SO:0000409 # SO:ke # A pyrimidine rich sequence near the 3' end of an intron to which the # 5'end becomes covalently bound during nuclear splicing. The resulting # structure resembles a lariat. branch_site SO:0000611 # Genomic DNA of immunoglobulin/T-cell receptor gene including C-region # (and introns if present) with 5' UTR (SO:0000204) and 3' UTR # (SO:0000205). http://imgt.cines.fr/ligmdb/LIGMlect?query=7 C_gene SO:0000478 # http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html # A structure consisting of a 7-methylguanosine in 5'-5' triphosphate # linkage with the first nucleotide of an mRNA. It is added # post-transcriptionally, and is not encoded in the DNA. cap SO:0000581 # SO:ke # A match against cDNA sequence. cDNA_match SO:0000689 # SO:ma # A contiguous sequence which begins with, and includes, a start codon and # ends with, and includes, a stop codon. CDS SO:0000316 # SO:ke # A region of chromosome where the spindle fibers attach during mitosis # and meiosis. centromere SO:0000577 # SO:ke # A part of a chromosome that has structural function. chromosomal_structural_element SO:0000628 # http://biotech.icmb.utexas.edu/search/dict-search.mhtml # Structural unit composed of long DNA molecule. chromosome SO:0000340 # SO:ke # Part of the primary transcript that is clipped off during processing. clip SO:0000303 # http://www.geospiza.com/community/support/glossary/ # A piece of DNA that has been inserted in a vector so that it can be # propagated in E. coli or some other organism. clone SO:0000151 # SO:ke # The end of the clone insert. clone_end SO:0000103 # SO:ke # The start of the clone insert. clone_start SO:0000179 # http://genomics.phrma.org/lexicon/c.html # A set of (usually) three nucleotide bases in a DNA or RNA sequence, # which together signify a unique amino acid or the termination of # translation. codon SO:0000360 # http://www.ebi.ac.uk/mutations/recommendations/mutevent.html # When no simple or well defined DNA mutation event describes the observed # DNA change, the keyword "complex" should be used. Usually there are # multiple equally plausible explanations for the change. complex_substitution SO:1000005 # SO:ls # A contiguous sequence derived from sequence assembly. Has no gaps, but # may contain N's from unvailable bases. contig SO:0000149 # SO:rd # Regions of a few hundred to a few thousand bases in vertebrate genomes # that are relatively GC and CpG rich; they are typically unmethylated and # often found near the 5' ends of genes. CpG_island SO:0000307 # SO:ma # A nucleotide match against a sequence from another organism. cross_genome_match SO:0000177 # Germline genomic DNA including D-region with 5' UTR and 3' UTR, also # designated as D-segment. D_gene SO:0000458 # SO:ke # The sequence referred to by an entry in a databank such as Genbank or # SwissProt. databank_entry SO:2000061 # SO:ke # A non-functional descendent of an exon. decayed_exon SO:0000464 # SO:ke # The sequence that is deleted. deletion SO:0000159 # SO:ke # The space between two bases in a sequence which marks the position where # a deletion has occured. deletion_junction SO:0000687 # SO:ke # A repeat where the same sequence is repeated in the same direction. # Example: GCTGA-----GCTGA. direct_repeat SO:0000314 # SO:ke # A repeat that is located at dispersed sites in the genome. dispersed_repeat SO:0000658 # http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types # A cis-acting sequence that increases the utilization of (some) # eukaryotic promoters, and can function in either orientation and in any # location (upstream or downstream) relative to the promoter. enhancer SO:0000165 # SO:ma # A non-coding RNA, usually with a specific secondary structure, that acts # to regulate gene expression. enzymatic_RNA SO:0000372 # http://genomics.phrma.org/lexicon/e.html # Expressed Sequence Tag: The sequence of a single sequencing read from a # cDNA clone or PCR product; typically a few hundred base pairs long. EST SO:0000345 # SO:ke # A match against an EST sequence. EST_match SO:0000668 # http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types # A region of the genome that codes for portion of spliced messenger RNA # (SO:0000234); may contain 5'-untranslated region (SO:0000204), all open # reading frames (SO:0000236) and 3'-untranslated region (SO:0000205). exon SO:0000147 # SO:ke # The boundary between two exons in a processed transcript. exon_junction SO:0000333 # SO:ke # A region of sequence implicated in an experimental result. experimental_result_region SO:0000703 # SO:ke # A match to an EST or cDNA sequence. expressed_sequence_match SO:0000102 # http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types # A region at the 5' end of a mature transcript (preceding the initiation # codon) that is not translated into a protein. five_prime_UTR SO:0000204 # http://biotech.icmb.utexas.edu/search/dict-search.mhtml # The DNA sequences extending on either side of a specific locus. flanking_region SO:0000239 # SO:ke # A gap in the sequence of known length. THe unkown bases are filled in # with N's. gap SO:0000730 # SO:rd # A locatable region of genomic sequence, corresponding to a unit of # inheritance, which is associated with regulatory regions, transcribed # regions and/or other functional sequence regions gene SO:0000704 # SO:ma # A collection of related genes. gene_group SO:0005855 # SO:ke # A kind of regulatory region that regulates a gene_group such as an # operon, rather than an individual gene. gene_group_regulatory_region SO:0000752 # SO:ls # A set of subregions selected from sequence contigs which when # concatenated form a nonredundant linear sequence. golden_path SO:0000688 # SO:rd # One of the pieces of sequence that make up a golden path. golden_path_fragment SO:0000468 # http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml # Group II introns are found in rRNA, tRNA and mRNA of organelles in # fungi, plants and protists, and also in mRNA in bacteria. They are large # self-splicing ribozymes and have 6 structural domains (usually # designated dI to dVI). A subset of group II introns also encode # essential splicing proteins in intronic ORFs. The length of these # introns can therefore be up to 3kb. Splicing occurs in almost identical # fashion to nuclear pre-mRNA splicing with two transesterification steps. # The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice # site, followed by nucleophilic attack on the 3' splice site by the 3' OH # of the upstream exon. Protein machinery is required for splicing in # vivo, and long range intron-intron and intron-exon interactions are # important for splice site positioning. Group II introns are further # sub-classified into groups IIA and IIB which differ in splice site # consensus, distance of bulged A from 3' splice site, some tertiary # interactions, and intronic ORF phylogeny. group_II_intron SO:0000603 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028 # Group I catalytic introns are large self-splicing ribozymes. They # catalyse their own excision from mRNA, tRNA and rRNA precursors in a # wide range of organisms. The core secondary structure consists of 9 # paired regions (P1-P9). These fold to essentially two domains, the P4-P6 # domain (formed from the stacking of P5, P4, P6 and P6a helices) and the # P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I # catalytic introns often have long ORFs inserted in loop regions. group_I_intron SO:0000587 # http://www.rna.ucla.edu/index.html # A short 3'-uridylated RNA that can form a perfect duplex (except for the # oligoU tail (SO:0000609)) with a stretch of mature edited mRNA. guide_RNA SO:0000602 # http:rnaworld.bio.ukans.edu/class/RNA/RNA00/RNA_World_3.html # A small catalytic RNA motif that catalyzes self-cleavage reaction. Its # name comes from its secondary structure which resembles a carpenter's # hammer. The hammerhead ribozyme is involved in the replication of some # viroid and some satellite RNAs. hammerhead_ribozyme SO:0000380 # SO:ke # A region of sequence identified as having been inserted. insertion SO:0000667 # SO:ke # The junction where an insertion occurred. insertion_site SO:0000366 # http:http://medical.webends.com/kw/Insulator%20Elements # Nucleic acid regulatory sequences that limit or oppose the action of # ENHANCER ELEMENTS and define the boundary between differentially # regulated gene loci. insulator SO:0000627 # SO:ke # A viral sequence which has integrated into the host genome. integrated_virus SO:0000113 # SO:ke # The region between two known genes. intergenic_region SO:0000605 # http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types # A segment of DNA that is transcribed, but removed from within the # transcript by splicing together the sequences (exons) on either side of # it. intron SO:0000188 # http://www.ebi.ac.uk/mutations/recommendations/mutevent.html # A continuous nucleotide sequence is inverted in the same position. inversion SO:1000036 # SO:ke # The sequence is complementarily repeated on the opposite strand. # Example: GCTGA-----TCAGC. inverted_repeat SO:0000294 # Germline genomic DNA of an immunoglobulin/T-cell receptor gene # including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), # also designated as J-segment. J_gene SO:0000470 # Recombination signal including J-heptamer, J-spacer and J-nonamer in # 5' of J-region of a J-gene or J-sequence J_gene_recombination_feature SO:0000302 # SO:ke # A junction refers to an interbase location of zero in a sequence. junction SO:0000699 # SO:ke # A region of sequence, aligned to another sequence with some statistical # significance, using an algorithm such as BLAST or SIM4. match SO:0000343 # SO:ke # A part of a match, for example an hsp from blast isa match_part. match_part SO:0000039 # SO:ke # A collection of match parts match_set SO:0000038 # http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html # The coding sequence for the mature or final peptide or protein product # following post-translational modification. mature_peptide SO:0000419 # SO:ke # A methylated adenine. methylated_A SO:0000161 # SO:ke # A nucleotide modified by methylation. methylated_base_feature SO:0000306 # SO:ke # A methylated deoxy-cytosine. methylated_C SO:0000114 # http://www.informatics.jax.org/silver/glossary.shtml # A very short unit sequence of DNA (2 to 4 bp) that is repeated multiple # times in tandem. microsatellite SO:0000289 # http://www.rerf.or.jp/eigo/glossary/minisate.htm # A repetitive sequence spanning 500 to 20,000 base pairs (a repeat unit # is 5 - 30 base pairs). minisatellite SO:0000643 # PMID:12592000 # Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA # gene. miRNAs are produced from precursor molecules (SO:0000647) that can # form local hairpin strcutures, which ordinarily are processed (via the # Dicer pathway) such that a single miRNA molecule accumulates from one # arm of a hairpinprecursor molecule. miRNAs may trigger the cleavage of # their target molecules oract as translational repressors. minus_10_signal SO:0000175 minus_35_signal SO:0000176 miRNA SO:0000276 # http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types # A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in # RNA) U. modified_base_site SO:0000305 # SO:ma # Messenger RNA is the intermediate molecule between DNA and protein. It # includes UTR and coding sequences. It does not contain introns. mRNA SO:0000234 # SO:ke # An mRNA sequence that does not encode for a protein rather the RNA # molecule is the gene product. ncRNA SO:0000655 # SO:ke # A primary transcript that is never translated into a protein. nc_primary_transcript SO:0000483 # SO:ke # A region of the gene which is not transcribed. non_transcribed_region SO:0000183 # SO:ma # A region of nucleotide sequence targeting by a nuclease enzyme. nuclease_sensitive_site SO:0000684 # SO:ke # A match against a nucleotide sequence. nucleotide_match SO:0000347 # SO:ke # A region of nucleotide sequence corresponding to a known motif. nucleotide_motif SO:0000714 # SO:ma # A short oligonucleotide sequence, of length on the order of 10's of # bases; either single or double stranded. oligo SO:0000696 # SO:ma # A regulatory element of an operon to which activators or repressors bind # hereby effecting translation of genes in that operon. operator SO:0000057 # SO:ma # A group of contiguous genes transcribed as a single (polycistronic) mRNA # from a single regulatory region. operon SO:0000178 # SO:rb # The inframe interval between the stop codons of a reading frame which # when read as sequential triplets, has the potential of encoding a # sequential string of amino acids. TER(NNN)nTER ORF SO:0000236 # http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types # The origin of replication; starting site for duplication of a nucleic # acid molecule to give two identical copies. origin_of_replication SO:0000296 # http:http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types # A region of a DNA molecule whre transfer is initiated during the process # of conjugation or mobilization. origin_of_transfer SO:0000724 # SO:ke # A region amplified by a PCR reaction. PCR_product SO:0000006 # http://www.ebi.ac.uk/mutations/recommendations/mutevent.html # A mutation event where a single DNA nucleotide changes into another # nucleotide. point_mutation SO:1000008 # SO:ke # Sequence of about 100 nucleotides of A added to the 3' end of most # eukaryotic mRNAs. polyA_sequence SO:0000610 # http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types # The recognition sequence necessary for endonuclease cleavage of an RNA # transcript that is followed by polyadenylation; consensus=AATAAA. polyA_signal_sequence SO:0000551 # http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types # The site on an RNA transcript to which will be added adenine residues by # post-transcriptional polyadenylation. polyA_site SO:0000553 # SO:ma # A sequence of amino acids linked by peptide bonds which may lack # appreciable tertiary structure and may not be liable to irreversable # denaturation. polypeptide SO:0000104 # http://nar.oupjournals.org/cgi/content/full/25/4/888 # The polypyrimidine tract is one of the cis-acting sequence elements # directing intron removal in pre-mRNA splicing. polypyrimidine_tract SO:0000612 # SO:ke # A region of sequence where there may have been an error in the assembly. possible_assembly_error SO:0000702 # SO:ke # A region of sequence where the validity of the base calling is # questionable. possible_base_call_error SO:0000701 # http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types # The primary (initial, unprocessed) transcript; includes five_prime_clip # (SO:0000555), five_prime_untranslated_region (SO:0000204), open reading # frames (SO:0000236), introns (SO:0000188) and three_prime_ # untranslated_region (three_prime_UTR), and three_prime_clip # (SO:0000557). primary_transcript SO:0000185 primary_transcript_region SO:0000835 # http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html # A short preexisting polynucleotide chain to which new # deoxyribonucleotides can be added by DNA polymerase. primer SO:0000112 # SO:ke # A transcript which has undergone processing to remove parts such as # introns and transcribed_spacer_regions. primer_binding_site SO:0005850 processed_transcript SO:0000233 # http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types # The region on a DNA molecule involved in RNA polymerase binding to # initiate transcription. promoter SO:0000167 # SO:ke # A primary transcript that, at least in part, encodes one or more # proteins. protein_coding_primary_transcript SO:0000120 # SO:ke # A match against a protein sequence. protein_match SO:0000349 # http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html # A sequence that closely resembles a known functional gene, at another # locus within a genome, that is non-functional as a consequence of # (usually several) mutations that prevent either its transcription or # translation (or both). In general, pseudogenes result from either # reverse transcription of a transcript of their "normal" paralog # (SO:0000043) (in which case the pseudogene typically lacks introns and # includes a poly(A) tail) or from recombination (SO:0000044) (in which # case the pseudogene is typically a tandem duplication of its "normal" # paralog). pseudogene SO:0000336 # SO:rb # The exon of a pseudogene. pseudogenic_exon SO:0000507 # SO:cjm # A non-functional descendent of a functional entitity. pseudogenic_region SO:0000462 # SO:rb # A transcript of a pseudogene pseudogenic_transcript SO:0000516 # http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284 # A small, 17-28-nt, small interfering RNA derived from transcripts # ofrepetitive elements. rasiRNA SO:0000454 # SO:rd # A sequence obtained from a single sequencing experiment. Typically a # read is produced when a base calling program interprets information from # a chromatogram trace file produced from a sequencing machine. read SO:0000150 # SO:rb # A nucleic acid sequence that when read as sequential triplets, has the # potential of encoding a sequential string of amino acids. It does not # contain the start or stop codon. reading_frame SO:0000717 # SO:ls # A pair of sequencing reads in which the two members of the pair are # related by originating at either end of a clone insert. read_pair SO:0000007 # SO:ke # A sequence used in experiment. reagent SO:0000695 # SO:ke # Continous sequence. region SO:0000001 # http://www.genpromag.com/scripts/glossary.asp?LETTER=R # A DNA sequence that controls the expression of a gene. regulatory_region SO:0005836 # ISBN:0198506732 # A group of genes, whether linked as a cluster or not, that respond to a # common regulatory signal. regulon SO:1001284 # SO:ke # A comment about the sequence. remark SO:0000700 # SO:ke # A group of characterized repeat sequences. repeat_family SO:0000187 # SO:ke # A region of sequence containing one or more repeat units. repeat_region SO:0000657 # http://www.agron.missouri.edu/cgi-bin/sybgw_mdb/mdb3/Term/119 # Any of the individual polynucleotide sequences produced by digestion of # DNA with a restriction endonuclease. restriction_fragment SO:0000412 # PMID:6247908 # A polymorphism detectable by the size differences in DNA fragments # generated by a restriction enzyme. RFLP_fragment SO:0000193 # SO:ke # Region in mRNA where ribosome assembles. ribosome_entry_site SO:0000139 # SO:ma # An RNA with catalytic activity. ribozyme SO:0000374 # SO:rd # A double stranded RNA duplex, at least 20bp long, used experimentally to # inhibit gene function by RNA interference. RNAi_reagent SO:0000337 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030 # The RNA molecule essential for the catalytic activity of RNase MRP, an # enzymatically active ribonucleoprotein with two distinct roles in # eukaryotes. In mitochondria it plays a direct role in the initiation of # mitochondrial DNA replication. In the nucleus it is involved in # precursor rRNA processing, where it cleaves the internal transcribed # spacer 1 between 18S and 5.8S rRNAs. RNase_MRP_RNA SO:0000385 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010 # The RNA component of Ribonuclease P (RNase P), a ubiquitous # endoribonuclease, found in archaea, bacteria and eukarya as well as # chloroplasts and mitochondria. Its best characterised activity is the # generation of mature 5 prime ends of tRNAs by cleaving the 5 prime # leader elements of precursor-tRNAs. Cellular RNase Ps are # ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic # activity in the absence of the protein subunit, i.e. it is a ribozyme. # Isolated eukaryotic and archaeal RNase P RNA has not been shown to # retain its catalytic function, but is still essential for the catalytic # activity of the holoenzyme. Although the archaeal and eukaryotic # holoenzymes have a much greater protein content than the bacterial ones, # the RNA cores from all the three lineages are homologous. Helices # corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular # RNase P RNAs. Yet, there is considerable sequence variation, # particularly among the eukaryotic RNAs. RNase_P_RNA SO:0000386 # ISBN:0198506732 # RNA that comprises part of a ribosome, and that can provide both # structural scaffolding and catalytic activity. rRNA SO:0000252 # SO:ke # 18S_rRNA -A large polynucleotide which functions as a part of the small # subunit of the ribosome rRNA_18S SO:0000407 # SO:ke # A component of the large ribosomal subunit. rRNA_28S SO:0000653 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002 # 5.8S ribosomal RNA (5.8S rRNA) is a component of the large subunit of # the eukaryotic ribosome. It is transcribed by RNA polymerase I as part # of the 45S precursor that also contains 18S and 28S rRNA. Functionally, # it is thought that 5.8S rRNA may be involved in ribosome translocation. # It is also known to form covalent linkage to the p53 tumour suppressor # protein. 5.8S rRNA is also found in archaea. rRNA_5.8S SO:0000375 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001 # 5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit # in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by # RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S # precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has # been shown that fingers 4-7 of the nine-zinc finger transcription factor # TFIIIA can bind to the central region of 5S RNA. Thus, in addition to # positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S # rRNA until it is required for transcription. rRNA_5S SO:0000652 # http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract # A short diagnostic sequence tag, serial analysis of gene expression # (SAGE), that allows the quantitative and simultaneous analysis of a # large number of transcripts. SAGE_tag SO:0000326 # http:www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html # Any one of several small cytoplasmic RNA moleculespresent in the # cytoplasm and sometimes nucleus of a eukaryote. scRNA SO:0000013 # SO:ke # A region where the sequences differs from that of a specified sequence. sequence_difference SO:0000413 sequence_conflict SO:0001085 # Describes the positions in a sequence where the authors are unsure # about the sequence assignment sequence_uncertainty SO:0001086 # SO:ke # A region of sequence where variation has been observed. # was variation SO:0000109 but this is now obsoleted in SO sequence_variant SO:0001060 SO:0000109 sequencing_information SO:0001082 # http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html # The sequence for an N-terminal domain of a secreted protein; this domain # is involved in attaching nascent polypeptide to the membrane leader # sequence. signal_peptide SO:0000418 # http://www.brunel.ac.uk/depts/bio/project/old_hmg/gloss3.htm#s # Combination of short DNA sequence elements which suppress the # transcription of an adjacent gene or genes. silencer SO:0000625 # PMID:12592000 # Small RNA molecule that is the product of a longerexogenous or # endogenous dsRNA, which is either a bimolecular duplexe or very # longhairpin, processed (via the Dicer pathway) such that numerous siRNAs # accumulatefrom both strands of the dsRNA. sRNAs trigger the cleavage of # their target molecules. siRNA SO:0000646 # SO:ma # A non-coding RNA, usually with a specific secondary structure, that acts # to regulate gene expression. small_regulatory_ncRNA SO:0000370 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012 # Small nucleolar RNAs (snoRNAs) are involved in the processing and # modification of rRNA in the nucleolus. There are two main classes of # snoRNAs: the box C/D class, and the box H/ACA class. U3 snoRNA is a # member of the box C/D class. Indeed, the box C/D element is a subset of # the six short sequence elements found in all U3 snoRNAs, namely boxes A, # A', B, C, C', and D. The U3 snoRNA secondary structure is characterised # by a small 5' domain (with boxes A and A'), and a larger 3' domain (with # boxes B, C, C', and D), the two domains being linked by a # single-stranded hinge. Boxes B and C form the B/C motif, which appears # to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. # The latter is functionally similar to the C/D motifs found in other # snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding # domain. The 3' domain has conserved protein-binding sites. Both the box # B/C and box C'/D motifs are sufficient for nuclear retention of U3 # snoRNA. The box C'/D motif is also necessary for nucleolar localization, # stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D # motifs are involved in specific protein interactions and are necessary # for the rRNA processing functions of U3 snoRNA. snoRNA SO:0000275 # http://www.cgr.ki.se/cgb/groups/brookes/Articles/essence_of_snps_article.pdf # SNPs are single base pair positions in genomic DNA at which different # sequence alternatives (alleles) exist in normal individuals in some # population(s), wherein the least frequent allele has an abundance of 1% # or greater. SNP SO:0000694 # PMID:11733745 # Small non-coding RNA in the nucleoplasm. A small nuclear RNA molecule # involved in pre-mRNA splicing and processing snRNA SO:0000274 # SO:ke # An intron which is spliced by the spliceosome. spliceosomal_intron SO:0000662 # http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html # The junction between the 3 prime end of an intron and the following # exon. splice_acceptor_site SO:0000164 # http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html # The junction between the 3 prime end of an exon and the following # intron. splice_donor_site SO:0000163 # SO:ke # Region of a transcript that regulates splicing. splice_enhancer SO:0000344 # SO:ke # The position where intron is excised. splice_site SO:0000162 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017 # The signal recognition particle (SRP) is a universally conserved # ribonucleoprotein. It is involved in the co-translational targeting of # proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide # 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP # consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 # proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh # protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and # archaeal 7S RNAs have very similar secondary structures, with eight # helical elements. These fold into the Alu and S domains, separated by a # long linker region. Eubacterial SRP is generally a simpler structure, # with the M domain of Ffh bound to a region of the 4.5S RNA that # corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some # Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger # SRP RNA that also has an Alu domain. The Alu domain is thought to # mediate the peptide chain elongation retardation function of the SRP. # The universally conserved helix which interacts with the SRP54/Ffh M # domain mediates signal sequence recognition. In eukaryotes and archaea, # the SRP19-helix 6 complex is thought to be involved in SRP assembly and # stabilizes helix 8 for SRP54 binding. SRP_RNA SO:0000590 # PMID:11081512 # Non-coding RNAs of about 21 nucleotides in length that regulate temporal # development; first discovered in C. elegans. stRNA SO:0000649 # http://www.biospace.com # Short (typically a few hundred base pairs) DNA sequence that has a # single occurrence in a genome and whose location and base sequence are # known. STS SO:0000331 # http://www.ebi.ac.uk/mutations/recommendations/mutevent.html # Any change in genomic DNA caused by a single event. substitution SO:1000002 # SO:ls # One or more contigs that have been ordered and oriented using end-read # information. Contains gaps that are filled with N's. supercontig SO:0000148 # SO:ke # A nucleotide sequence that may be used to identify a larger sequence. tag SO:0000324 # http://www.sci.sdsu.edu/ ~ smaloy/Glossary/T.html # Two or more adjacent copies of a DNA sequence. tandem_repeat SO:0000705 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025 # The RNA component of telomerase, a reverse transcriptase that # synthesises telomeric DNA. telomerase_RNA SO:0000390 # SO:ma # A specific structure at the end of a linear chromosome, required for the # integrity and maintenence of the end, telomere SO:0000624 # http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types # The sequence of DNA located either at the end of the transcript that # causes RNA polymerase to terminate transcription. terminator SO:0000141 # SO:ke # A region of a molecule that binds to a transcription factor. TF_binding_site SO:0000235 # http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types # A region at the 3' end of a mature transcript (following the stop codon) # that is not translated into a protein. three_prime_UTR SO:0000205 # CJM:SO # A set of regions which overlap with minimal polymorphism to form a # linear sequence. tiling_path SO:0000472 # SO:ke # A piece of sequence that makes up a tiling_path.SO:0000472. tiling_path_fragment SO:0000474 # A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this # part of the tmRNA is used as a reading frame that ends in a # translation stop signal. The broken mRNA is replaced in the ribosome # by the tmRNA and translation of the tmRNA leads to addition of a # proteolysis tag to the incomplete protein enabling recognition by a # protease. Recently a number of permuted tmRNAs genes have been found # encoded in two parts. TmRNAs have been identified in eubacteria and # some chloroplasts but are absent from archeal and eukaryote nuclear # genomes. tmRNA SO:0000584 # SO:ma # An RNA synthesized on a DNA or RNA template by an RNA polymerase. transcript SO:0000673 # SO:ke # The site where transcription ends. transcription_end_site SO:0000616 # SO:ke # The site where transcription begins. transcription_start_site SO:0000315 # http:http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types # The coding sequence for an N-terminal domain of a nuclear-encoded # organellar protein: this domain is involved in post translational import # of the protein into the organelle. transit_peptide SO:0000725 # SO:ke # A match against a translated sequence. translated_nucleotide_match SO:0000181 # http://www.sci.sdsu.edu/ ~ smaloy/Glossary/T.html # A transposon or insertion sequence. An element that can insert in a # variety of DNA sequences. transposable_element SO:0000101 # SO:ke # The junction in a genome where a transposable_element has inserted. transposable_element_insertion_site SO:0000368 # SO:ke # The process that produces mature transcripts by combining exons of # independent pre-mRNA molecules. The acceptor site lies on the 3' of # these molecules. trans_splice_acceptor_site SO:0000706 # ISBN:0198506732 # Transfer RNA (tRNA) molecules are approximately 80 nucleotides in # length. Their secondary structure includes four short double-helical # elements and three loops (D, anti-codon, and T loops). Further hydrogen # bonds mediate the characteristic L-shaped molecular structure. tRNAs # have two regions of fundamental functional importance: the anti-codon, # which is responsible for specific mRNA codon recognition, and the 3' # end, to which the tRNA's corresponding amino acid is attached (by # aminoacyl-tRNA synthetases). tRNAs cope with the degeneracy of the # genetic code in two manners: having more than one tRNA (with a specific # anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. # permitting non-standard base-pairing at the 3rd anti-codon position. tRNA SO:0000253 # PMID:9622129 # U11 snRNA plays a role in splicing of the minor U12-dependent class of # eukaryotic nuclear introns, similar to U1 snRNA in the major class # spliceosome it base pairs to the conserved 5' splice site sequence. U11_snRNA SO:0000398 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007 # The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 # snRNAs and associated proteins, forms a spliceosome that cleaves a # divergent class of low-abundance pre-mRNA introns. U12_snRNA SO:0000399 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016 # U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of # eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop # region (known as the Y-domain) which is essential for function. A # similar structure, but with a different consensus sequence, is found in # plants, but is absent in vertebrates. U14_snRNA SO:0000403 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003 # U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved # in pre-mRNA splicing). Its 5' end forms complementary base pairs with # the 5' splice junction, thus defining the 5' donor site of an intron. # There are significant differences in sequence and secondary structure # between metazoan and yeast U1 snRNAs, the latter being much longer (568 # nucleotides as compared to 164 nucleotides in human). Nevertheless, # secondary structure predictions suggest that all U1 snRNAs share a # 'common core' consisting of helices I, II, the proximal region of III, # and IV. U1_snRNA SO:0000391 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004 # U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved # in pre-mRNA splicing). Complementary binding between U2 snRNA (in an # area lying towards the 5' end but 3' to hairpin I) and the branchpoint # sequence (BPS) of the intron results in the bulging out of an unpaired # adenine, on the BPS, which initiates a nucleophilic attack at the # intronic 5' splice site, thus starting the first of two # transesterification reactions that mediate splicing. U2_snRNA SO:0000392 # PMID:=12409455 # An snRNA required for the splicing of the minor U12-dependent class of # eukaryotic nuclear introns. It forms a base paired complex with # U6atac_snRNA (SO:0000397). U4atac_snRNA SO:0000394 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015 # U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent # spliceosome. It forms a duplex with U6, and with each splicing round, it # is displaced from U6 (and the spliceosome) in an ATP-dependent manner, # allowing U6 to refold and create the active site for splicing catalysis. # A recycling process involving protein Prp24 re-anneals U4 and U6. U4_snRNA SO:0000393 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020 # U5 RNA is a component of both types of known spliceosome. The precise # function of this molecule is unknown, though it is known that the 5' # loop is required for splice site selection and p220 binding, and that # both the 3' stem-loop and the Sm site are important for Sm protein # binding and cap methylation. U5_snRNA SO:0000395 # http:http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=1 2409455&dopt=Abstract # U6atac_snRNA -An snRNA required for the splicing of the minor # U12-dependent class of eukaryotic nuclear introns. It forms a base # paired complex with U4atac_snRNA (SO:0000394). U6atac_snRNA SO:0000397 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015 # U6 snRNA is a component of the spliceosome which is involved in splicing # pre-mRNA. The putative secondary structure consensus base pairing is # confined to a short 5' stem loop, but U6 snRNA is thought to form # extensive base-pair interactions with U4 snRNA. U6_snRNA SO:0000396 # FB:WG # An ordered and oriented set of scaffolds based on somewhat weaker sets # of inferential evidence such as one set of mate pair reads together with # supporting evidence from ESTs or location of markers from SNP or # microsatellite maps, or cytogenetic localization of contained markers. ultracontig SO:0000719 # SO:ke # Messenger RNA sequences that are untranslated and lie five prime and # three prime to sequences which are translated. UTR SO:0000203 # Germline genomic DNA including L-part1, V-intron and V-exon, with the # 5' UTR and 3' UTR. V_gene SO:0000466 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006 # A family of RNAs are found as part of the enigmatic vault # ribonuceoprotein complex. The complex consists of a major vault protein # (MVP), two minor vault proteins (VPARP and TEP1), and several small # untranslated RNA molecules. It has been suggested that the vault complex # is involved in drug resistance. vault_RNA SO:0000404 vertebrate_immune_system_gene_recombination_feature SO:0000301 SO:0000301_S_region # Use for EMBL N_region vertebrate_immune_system_gene_recombination_spacer SO:0000563 # Use for EMBL V_region vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster SO:0000938 # SO:ke # A continous piece of sequence similar to the 'virtual contig' concept of # ensembl. virtual_sequence SO:0000499 # http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019 # Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in # association with Ro60 and La proteins. The Y RNAs and Ro60 and La # proteins are well conserved, but the function of the Ro RNP is not # known. In humans the RNA component can be one of four small RNAs: hY1, # hY3, hY4 and hY5. These small RNAs are predicted to fold into a # conserved secondary structure containing three stem structures. The # largest of the four, hY1, contains an additional hairpin. Y_RNA SO:0000405 ####include Efeatures.gff