The com.hfg.bio.seq.format package contains classes for reading or writing protein or nucleotide sequence objects in various formats. Readable formats implement ReadableSeqFormat and writable formats implement WritableSeqFormat. [Note that this package is still a work in progress so not all formats are both readable and writable. Avaialble formats are FASTA, FASTQ, GenBank, and EMBL/UniProt. EMBL/UniProt currently only supports reading.]
- File seqFile = new File("test_seqs.fasta");
- BufferedReader reader = new BufferedReader(new FileReader(seqFile));
- FASTA<Protein> fastaObj = new FASTA<>(new ProteinFactory());
- BufferedSeqReader<Protein> seqReader = new BufferedSeqReader<Protein>(reader, fastaObj);
- List<Protein> seqs = seqReader.readAll();
- seqReader.close();
- FASTA<Protein> fastaObj = new FASTA<>();
- FileWriter writer = new FileWriter("write_seqs_test.fasta");
- for (Protein protein : seqs)
- {
- fastaObj.write(protein, writer);
- }
- writer.close();
- File seqFile = new File("test_seq.gb");
- BufferedReader reader = new BufferedReader(new FileReader(seqFile));
- GenBank<NucleicAcid> formatObj = new GenBank<>(new NucleicAcidFactory());
- BufferedSeqReader<NucleicAcid> seqReader = new BufferedSeqReader<>(reader, formatObj);
- List<NucleicAcid> seqs = new ArrayList<>();
- int seqCount = 0;
- while (seqReader.hasNext())
- {
- seqs.add(seqReader.next());
- seqCount++;
- }
- seqReader.close();
- NucleicAcid seq = seqs.get(0);
- Assert.assertEquals("R88064.1", seq.getID());
- Assert.assertEquals(MolType.mRNA, seq.getMolType());
- Assert.assertEquals(SeqTopology.LINEAR, seq.getSeqTopology());
- Assert.assertEquals(NCBIGenBankDivision.EST, seq.getSeqRepositoryDivision());
- Assert.assertEquals("ym87c11.r1 Soares adult brain N2b4HB55Y Homo sapiens cDNA clone IMAGE:165908 5', mRNA sequence", seq.getDescription());
- Assert.assertEquals(460, seq.length());
- Assert.assertEquals("Homo sapiens", seq.getNCBITaxon().getScientificName());
- Assert.assertEquals(1, seq.getReferences().size());
- SeqCitation reference = seq.getReferences().get(0);
- Assert.assertEquals("The WashU-Merck EST Project", reference.getTitle());
- Assert.assertEquals("Unpublished", reference.toString());
- Assert.assertEquals(21, reference.getAuthors().size());
- Assert.assertEquals("Hillier,L., Clark,N., Dubuque,T., Elliston,K., Hawkins,M., Holman,M., Hultman,M., Kucaba,T., Le,M., Lennon,G., Marra,M., Parsons,J., Rifkin,L., Rohlfing,T., Soares,M., Tan,F., Trevaskis,E., Waterston,R., Williamson,A., Wohldmann,P., Wilson,R.", StringUtil.join(reference.getAuthors(), ", "));
- List<SeqFeature> cdsFeatures = seq.getFeatures(GenBankFeatureKey.CDS);