The com.hfg.bio.seq.format package contains classes for reading or writing protein or nucleotide sequence objects in various formats. Readable formats implement ReadableSeqFormat and writable formats implement WritableSeqFormat. [Note that this package is still a work in progress so not all formats are both readable and writable. Avaialble formats are FASTA, FASTQ, GenBank, and EMBL/UniProt. EMBL/UniProt currently only supports reading.]
File seqFile = new File("test_seqs.fasta"); BufferedReader reader = new BufferedReader(new FileReader(seqFile)); FASTA<Protein> fastaObj = new FASTA<>(new ProteinFactory()); BufferedSeqReader<Protein> seqReader = new BufferedSeqReader<Protein>(reader, fastaObj); List<Protein> seqs = seqReader.readAll(); seqReader.close();
FASTA<Protein> fastaObj = new FASTA<>(); FileWriter writer = new FileWriter("write_seqs_test.fasta"); for (Protein protein : seqs) { fastaObj.write(protein, writer); } writer.close();
File seqFile = new File("test_seq.gb"); BufferedReader reader = new BufferedReader(new FileReader(seqFile)); GenBank<NucleicAcid> formatObj = new GenBank<>(new NucleicAcidFactory()); BufferedSeqReader<NucleicAcid> seqReader = new BufferedSeqReader<>(reader, formatObj); List<NucleicAcid> seqs = new ArrayList<>(); int seqCount = 0; while (seqReader.hasNext()) { seqs.add(seqReader.next()); seqCount++; } seqReader.close(); NucleicAcid seq = seqs.get(0); Assert.assertEquals("R88064.1", seq.getID()); Assert.assertEquals(MolType.mRNA, seq.getMolType()); Assert.assertEquals(SeqTopology.LINEAR, seq.getSeqTopology()); Assert.assertEquals(NCBIGenBankDivision.EST, seq.getSeqRepositoryDivision()); Assert.assertEquals("ym87c11.r1 Soares adult brain N2b4HB55Y Homo sapiens cDNA clone IMAGE:165908 5', mRNA sequence", seq.getDescription()); Assert.assertEquals(460, seq.length()); Assert.assertEquals("Homo sapiens", seq.getNCBITaxon().getScientificName()); Assert.assertEquals(1, seq.getReferences().size()); SeqCitation reference = seq.getReferences().get(0); Assert.assertEquals("The WashU-Merck EST Project", reference.getTitle()); Assert.assertEquals("Unpublished", reference.toString()); Assert.assertEquals(21, reference.getAuthors().size()); Assert.assertEquals("Hillier,L., Clark,N., Dubuque,T., Elliston,K., Hawkins,M., Holman,M., Hultman,M., Kucaba,T., Le,M., Lennon,G., Marra,M., Parsons,J., Rifkin,L., Rohlfing,T., Soares,M., Tan,F., Trevaskis,E., Waterston,R., Williamson,A., Wohldmann,P., Wilson,R.", StringUtil.join(reference.getAuthors(), ", ")); List<SeqFeature> cdsFeatures = seq.getFeatures(GenBankFeatureKey.CDS);