001package com.hfg.bio.seq.format.feature.genbank; 002 003import com.hfg.bio.seq.format.GenBank; 004import com.hfg.bio.seq.format.feature.FeatureKey; 005import com.hfg.util.StringUtil; 006 007import java.util.Collection; 008import java.util.HashMap; 009import java.util.Map; 010 011//------------------------------------------------------------------------------ 012/** 013 DDBJ/EMBL/GenBank feature table keys for flat-file records. 014 <p> 015 See <a href='http://www.insdc.org/documents/feature-table'>http://www.insdc.org/documents/feature-table</a> 016 </p> 017 @author J. Alex Taylor, hairyfatguy.com 018 */ 019//------------------------------------------------------------------------------ 020// com.hfg Library 021// 022// This library is free software; you can redistribute it and/or 023// modify it under the terms of the GNU Lesser General Public 024// License as published by the Free Software Foundation; either 025// version 2.1 of the License, or (at your option) any later version. 026// 027// This library is distributed in the hope that it will be useful, 028// but WITHOUT ANY WARRANTY; without even the implied warranty of 029// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 030// Lesser General Public License for more details. 031// 032// You should have received a copy of the GNU Lesser General Public 033// License along with this library; if not, write to the Free Software 034// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 035// 036// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 037// jataylor@hairyfatguy.com 038//------------------------------------------------------------------------------ 039 040public class GenBankFeatureKey implements FeatureKey 041{ 042 private String mName; 043 private String mDescription; 044 045 private static Map<String, GenBankFeatureKey> sUniqueMap = new HashMap<String, GenBankFeatureKey>(); 046 047 public static final GenBankFeatureKey assembly_gap = new GenBankFeatureKey("assembly_gap") 048 .setDescription("gap between two components of a genome or transcriptome assembly"); 049 050 public static final GenBankFeatureKey attenuator = new GenBankFeatureKey("attenuator") 051 .setDescription("1) region of DNA at which regulation of termination of transcription occurs, which controls the expression of some bacterial operons;\n" + 052 "2) sequence segment located between the promoter and the first structural gene that causes partial termination of transcription"); 053 054 public static final GenBankFeatureKey C_region = new GenBankFeatureKey("C_region") 055 .setDescription("constant region of immunoglobulin light and heavy chains, and T-cell receptor alpha, beta, and gamma chains; includes one or more exons depending on the particular chain"); 056 057 public static final GenBankFeatureKey CAAT_signal = new GenBankFeatureKey("CAAT_signal") 058 .setDescription("CAAT box; part of a conserved sequence located about 75 bp up-stream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C or T)CAATCT [1,2]."); 059 060 public static final GenBankFeatureKey CDS = new GenBankFeatureKey("CDS") 061 .setDescription("coding sequence; sequence of nucleotides that corresponds with the sequence of amino acids in a protein (location includes stop codon); feature includes amino acid conceptual translation."); 062 063 public static final GenBankFeatureKey centromere = new GenBankFeatureKey("centromere") 064 .setDescription("region of biological interest identified as a centromere and which has been experimentally characterized;"); 065 066 public static final GenBankFeatureKey conflict = new GenBankFeatureKey("conflict") 067 .setDescription("Not found in documentation"); 068 069 public static final GenBankFeatureKey D_loop = new GenBankFeatureKey("D-loop") 070 .setDescription("displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein"); 071 072 public static final GenBankFeatureKey D_segment = new GenBankFeatureKey("D_segment") 073 .setDescription("Diversity segment of immunoglobulin heavy chain, and T-cell receptor beta chain;"); 074 075 public static final GenBankFeatureKey enhancer = new GenBankFeatureKey("enhancer") 076 .setDescription("a cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter;"); 077 078 public static final GenBankFeatureKey exon = new GenBankFeatureKey("exon") 079 .setDescription("region of genome that codes for portion of spliced mRNA, rRNA and tRNA; may contain 5'UTR, all CDSs and 3' UTR;"); 080 081 public static final GenBankFeatureKey gap = new GenBankFeatureKey("gap") 082 .setDescription("gap in the sequence"); 083 084 public static final GenBankFeatureKey GC_signal = new GenBankFeatureKey("GC_signal") 085 .setDescription("GC box; a conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG;"); 086 087 public static final GenBankFeatureKey gene = new GenBankFeatureKey("gene") 088 .setDescription("region of biological interest identified as a gene and for which a name has been assigned;"); 089 090 public static final GenBankFeatureKey iDNA = new GenBankFeatureKey("iDNA") 091 .setDescription("intervening DNA; DNA which is eliminated through any of several kinds of recombination;"); 092 093 public static final GenBankFeatureKey intron = new GenBankFeatureKey("intron") 094 .setDescription("a segment of DNA that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it;"); 095 096 public static final GenBankFeatureKey J_segment = new GenBankFeatureKey("J_segment") 097 .setDescription("joining segment of immunoglobulin light and heavy chains, and T-cell receptor alpha, beta, and gamma chains; "); 098 099 public static final GenBankFeatureKey LTR = new GenBankFeatureKey("LTR") 100 .setDescription("long terminal repeat, a sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses;"); 101 102 public static final GenBankFeatureKey mat_peptide = new GenBankFeatureKey("mat_peptide") 103 .setDescription("mature peptide or protein coding sequence; coding sequence for the mature or final peptide or protein product following post-translational modification; the location does not include the stop codon (unlike the corresponding CDS);"); 104 105 public static final GenBankFeatureKey misc_binding = new GenBankFeatureKey("misc_binding") 106 .setDescription("site in nucleic acid which covalently or non-covalently binds another moiety that cannot be described by any other binding key (primer_bind or protein_bind);"); 107 108 public static final GenBankFeatureKey misc_difference = new GenBankFeatureKey("misc_difference") 109 .setDescription("feature sequence is different from that presented in the entry and cannot be described by any other Difference key (unsure, old_sequence, variation, or modified_base);"); 110 111 public static final GenBankFeatureKey misc_feature = new GenBankFeatureKey("misc_feature") 112 .setDescription("region of biological interest which cannot be described by any other feature key; a new or rare feature;"); 113 114 public static final GenBankFeatureKey misc_recomb = new GenBankFeatureKey("misc_recomb") 115 .setDescription("site of any generalized, site-specific or replicative recombination event where there is a breakage and reunion of duplex DNA that cannot be described by other recombination keys or qualifiers of source key (/proviral);"); 116 117 public static final GenBankFeatureKey misc_RNA = new GenBankFeatureKey("misc_RNA") 118 .setDescription("any transcript or RNA product that cannot be defined by other RNA keys (prim_transcript, precursor_RNA, mRNA, 5'UTR, 3'UTR, exon, CDS, sig_peptide, transit_peptide, mat_peptide, intron, polyA_site, ncRNA, rRNA and tRNA);"); 119 120 public static final GenBankFeatureKey misc_signal = new GenBankFeatureKey("misc_signal") 121 .setDescription("any region containing a signal controlling or altering gene function or expression that cannot be described by other signal keys (promoter, CAAT_signal, TATA_signal, -35_signal, -10_signal, GC_signal, RBS, polyA_signal, enhancer, attenuator, terminator, and rep_origin)."); 122 123 public static final GenBankFeatureKey misc_structure = new GenBankFeatureKey("misc_structure") 124 .setDescription("any secondary or tertiary nucleotide structure or conformation that cannot be described by other Structure keys (stem_loop and D-loop);"); 125 126 public static final GenBankFeatureKey mobile_element = new GenBankFeatureKey("mobile_element") 127 .setDescription("region of genome containing mobile elements;"); 128 129 public static final GenBankFeatureKey modified_base = new GenBankFeatureKey("modified_base") 130 .setDescription("the indicated nucleotide is a modified nucleotide and should be substituted for by the indicated molecule (given in the mod_base qualifier value)"); 131 132 public static final GenBankFeatureKey mRNA = new GenBankFeatureKey("mRNA") 133 .setDescription("messenger RNA; includes 5'untranslated region (5'UTR), coding sequences (CDS, exon) and 3'untranslated region (3'UTR);"); 134 135 public static final GenBankFeatureKey ncRNA = new GenBankFeatureKey("ncRNA") 136 .setDescription("a non-protein-coding gene, other than ribosomal RNA and transfer RNA, the functional molecule of which is the RNA transcript;"); 137 138 public static final GenBankFeatureKey N_region = new GenBankFeatureKey("N_region") 139 .setDescription("extra nucleotides inserted between rearranged immunoglobulin segments."); 140 141 public static final GenBankFeatureKey old_sequence = new GenBankFeatureKey("old_sequence") 142 .setDescription("the presented sequence revises a previous version of the sequence at this location;"); 143 144 public static final GenBankFeatureKey operon = new GenBankFeatureKey("operon") 145 .setDescription("region containing polycistronic transcript including a cluster of genes that are under the control of the same regulatory sequences/promotor and in the same biological pathway"); 146 147 public static final GenBankFeatureKey oriT = new GenBankFeatureKey("oriT") 148 .setDescription("origin of transfer; region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization"); 149 150 public static final GenBankFeatureKey polyA_signal = new GenBankFeatureKey("polyA_signal") 151 .setDescription("recognition region necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA;"); 152 153 public static final GenBankFeatureKey polyA_site = new GenBankFeatureKey("polyA_site") 154 .setDescription("site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation;"); 155 156 public static final GenBankFeatureKey precursor_RNA = new GenBankFeatureKey("precursor_RNA") 157 .setDescription("any RNA species that is not yet the mature RNA product; may include 5' untranslated region (5'UTR), coding sequences (CDS, exon), intervening sequences (intron) and 3' untranslated region (3'UTR);"); 158 159 public static final GenBankFeatureKey prim_transcript = new GenBankFeatureKey("prim_transcript") 160 .setDescription("primary (initial, unprocessed) transcript; includes 5' untranslated region (5'UTR), coding sequences (CDS, exon), intervening sequences (intron) and 3' untranslated region (3'UTR);"); 161 162 public static final GenBankFeatureKey primer_bind = new GenBankFeatureKey("primer_bind") 163 .setDescription("non-covalent primer binding site for initiation of replication, transcription, or reverse transcription; includes site(s) for synthetic e.g., PCR primer elements;"); 164 165 public static final GenBankFeatureKey promoter = new GenBankFeatureKey("promoter") 166 .setDescription("region on a DNA molecule involved in RNA polymerase binding to initiate transcription;"); 167 168 public static final GenBankFeatureKey protein_bind = new GenBankFeatureKey("protein_bind") 169 .setDescription("non-covalent protein binding site on nucleic acid;"); 170 171 public static final GenBankFeatureKey RBS = new GenBankFeatureKey("RBS") 172 .setDescription("ribosome binding site;"); 173 174 public static final GenBankFeatureKey regulatory = new GenBankFeatureKey("regulatory") 175 .setDescription("region involved in regulation of expression; Not found in documentation"); 176 177 public static final GenBankFeatureKey repeat_region = new GenBankFeatureKey("repeat_region") 178 .setDescription("region of genome containing repeating units;"); 179 180 public static final GenBankFeatureKey rep_origin = new GenBankFeatureKey("rep_origin") 181 .setDescription("origin of replication; starting site for duplication of nucleic acid to give two identical copies;"); 182 183 public static final GenBankFeatureKey rRNA = new GenBankFeatureKey("rRNA") 184 .setDescription("mature ribosomal RNA; RNA component of the ribonucleoprotein particle (ribosome) which assembles amino acids into proteins."); 185 186 public static final GenBankFeatureKey S_region = new GenBankFeatureKey("S_region") 187 .setDescription("switch region of immunoglobulin heavy chains; involved in the rearrangement of heavy chain DNA leading to the expression of a different immunoglobulin class from the same B-cell;"); 188 189 public static final GenBankFeatureKey sig_peptide = new GenBankFeatureKey("sig_peptide") 190 .setDescription("signal peptide coding sequence; coding sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence;"); 191 192 public static final GenBankFeatureKey source = new GenBankFeatureKey("source") 193 .setDescription("identifies the biological source of the specified span of the sequence; this key is mandatory; more than one source key per sequence is allowed; every entry/record will have, as a minimum, either a single source key spanning the entire sequence or multiple source keys, which together, span the entire sequence."); 194 195 public static final GenBankFeatureKey stem_loop = new GenBankFeatureKey("stem_loop") 196 .setDescription("hairpin; a double-helical region formed by base-pairing between adjacent (inverted) complementary sequences in a single strand of RNA or DNA. "); 197 198 public static final GenBankFeatureKey STS = new GenBankFeatureKey("STS") 199 .setDescription("sequence tagged site; short, single-copy DNA sequence that characterizes a mapping landmark on the genome andcan be detected by PCR; a region of the genome can bemapped by determining the order of a series of STSs;"); 200 201 public static final GenBankFeatureKey TATA_signal = new GenBankFeatureKey("TATA_signal") 202 .setDescription("TATA box; Goldberg-Hogness box; a conserved AT-rich septamer found about 25 bp before the start point of each eukaryotic RNA polymerase II transcript unit which may be involved in positioning the enzyme for correct initiation; consensus=TATA(A or T)A(A or T)"); 203 204 public static final GenBankFeatureKey telomere = new GenBankFeatureKey("telomere") 205 .setDescription("region of biological interest identified as a telomere and which has been experimentally characterized;"); 206 207 public static final GenBankFeatureKey terminator = new GenBankFeatureKey("terminator") 208 .setDescription("sequence of DNA located either at the end of the ranscript that causes RNA polymerase to terminate transcription;"); 209 210 public static final GenBankFeatureKey tmRNA = new GenBankFeatureKey("tmRNA") 211 .setDescription("transfer messenger RNA; tmRNA acts as a tRNA first, and then as an mRNA that encodes a peptide tag; the ribosome translates this mRNA region of tmRNA and attaches the encoded peptide tag to the C-terminus of the unfinished protein; this attached tag targets the protein for destruction or proteolysis;"); 212 213 public static final GenBankFeatureKey transit_peptide = new GenBankFeatureKey("transit_peptide") 214 .setDescription("transit peptide coding sequence; coding sequence for an N-terminal domain of a nuclear-encoded organellar protein; this domain is involved in post-translational import of the protein into the organelle;"); 215 216 public static final GenBankFeatureKey tRNA = new GenBankFeatureKey("tRNA") 217 .setDescription("mature transfer RNA, a small RNA molecule (75-85 bases long) that mediates the translation of a nucleic acid sequence into an amino acid sequence;"); 218 219 public static final GenBankFeatureKey unsure = new GenBankFeatureKey("unsure") 220 .setDescription("author is unsure of exact sequence in this region;"); 221 222 public static final GenBankFeatureKey V_region = new GenBankFeatureKey("V_region") 223 .setDescription("variable region of immunoglobulin light and heavy chains, and T-cell receptor alpha, beta, and gamma chains; codes for the variable amino terminal portion; can be composed of V_segments, D_segments, N_regions, and J_segments;"); 224 225 public static final GenBankFeatureKey V_segment = new GenBankFeatureKey("V_segment") 226 .setDescription("variable segment of immunoglobulin light and heavy chains, and T-cell receptor alpha, beta, and gamma chains; codes for most of the variable region (V_region) and the last few amino acids of the leader peptide;"); 227 228 public static final GenBankFeatureKey variation = new GenBankFeatureKey("variation") 229 .setDescription("a related strain contains stable mutations from the same gene (e.g., RFLPs, polymorphisms, etc.) which differ from the presented sequence at this location (and possibly others);"); 230 231 public static final GenBankFeatureKey ThreePrime_UTR = new GenBankFeatureKey("3'UTR") 232 .setDescription("1) region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein;\n" + 233 "2) region at the 3' end of an RNA virus (following the last stop codon) that is not translated into a protein;"); 234 235 public static final GenBankFeatureKey FivePrime_UTR = new GenBankFeatureKey("5'UTR") 236 .setDescription("1) region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein;\n" + 237 "2) region at the 5' end of an RNA virus genome (preceding the first initiation codon) that is not translated into a protein;"); 238 239 public static final GenBankFeatureKey Minus10_signal = new GenBankFeatureKey("-10_signal") 240 .setDescription("Pribnow box; a conserved region about 10 bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT"); 241 242 public static final GenBankFeatureKey Minus35_signal = new GenBankFeatureKey("-35_signal") 243 .setDescription("a conserved hexamer about 35 bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA;"); 244 245 //########################################################################### 246 // CONSTRUCTORS 247 //########################################################################### 248 249 //--------------------------------------------------------------------------- 250 private GenBankFeatureKey(String inName) 251 { 252 mName = inName; 253 sUniqueMap.put(mName, this); 254 } 255 256 //########################################################################### 257 // PUBLIC METHODS 258 //########################################################################### 259 260 //--------------------------------------------------------------------------- 261 public static GenBankFeatureKey valueOf(String inValue) 262 { 263 GenBankFeatureKey key = sUniqueMap.get(inValue); 264 if (null == key) 265 { 266 key = new GenBankFeatureKey(inValue); 267 GenBank.getLogger().warning(StringUtil.singleQuote(inValue) + " is not a recognized GenBank feature key!"); 268 } 269 270 return key; 271 } 272 273 //--------------------------------------------------------------------------- 274 public static Collection<GenBankFeatureKey> values() 275 { 276 return sUniqueMap.values(); 277 } 278 279 //--------------------------------------------------------------------------- 280 public String name() 281 { 282 return mName; 283 } 284 285 //--------------------------------------------------------------------------- 286 @Override 287 public String toString() 288 { 289 return name(); 290 } 291 292 //--------------------------------------------------------------------------- 293 private GenBankFeatureKey setDescription(String inValue) 294 { 295 mDescription = inValue; 296 return this; 297 } 298 299 //--------------------------------------------------------------------------- 300 public String getDescription() 301 { 302 return mDescription; 303 } 304 305}