The classes in the com.hfg.bio package facilitate the calculation of various physical properties for protein or nucleotide sequences. The object hierarchy begins with Element which implements the OrganicMatter interface. Molecule then extends OrganicMatter and contains Elements. AminoAcid and Nucleotide in turn extend Molecule and they are in turn contained in Protein and NucleicAcid objects.
Protein protein = new Protein()
.setID("Trypsin")
.setSequence("VDDDDKIVGGYTCGANTVPYQVSLNSGYHFCGGSLINSQWVVSAAHCYKSG"
+ "IQVRLGEDNINVVEGNEQFISASKSIVHPSYNSNTLNNDIMLIKLKSAA"
+ "SLNSRVASISLPTSCASAGTQCLISGWGNTKSSGTSYPDVLKCLKAPIL"
+ "SDSSCKSAYPGQITSNMFCAGYLEGGKDSCQGDSGGPVVCSGKLQGIVS"
+ "WGSGCAQKNKPGVYTKVCNYVSWIKQTIASN");
protein.setNumDisulfideBonds(6);
// Mass
Assert.assertEquals(23980.65113, protein.getAverageMass(), 0.0001);
Assert.assertEquals(23980.76178, protein.getOrganicAverageMass(), 0.0001);
Assert.assertEquals(23965.4927994, protein.getMonoisotopicMass(), 0.00001);
// Chemical formula
Assert.assertEquals("C1039H1626N286O338S14", protein.getChemicalFormula());
Assert.assertEquals("C₁₀₃₉H₁₆₂₆N₂₈₆O₃₃₈S₁₄", protein.getChemicalFormulaWithSubscripts());
// Elemental composition
Assert.assertEquals(1626, protein.getElementalComposition(ProteinAnalysisMode.NATIVE).get(Element.HYDROGEN).intValue());
ReducedAnalysisMode reducedAnalysisMode = new ReducedAnalysisMode().setAlkylatedCysteine(AminoAcid.CYSTEINE);
Assert.assertEquals(1638, protein.getElementalComposition(reducedAnalysisMode).get(Element.HYDROGEN).intValue());
// Trypsin
Protein protein = new Protein();
protein.setSequence("VDDDDKIVGGYTCGANTVPYQVSLNSGYHFCGGSLINSQWVVSAAHCYKSG"
+ "IQVRLGEDNINVVEGNEQFISASKSIVHPSYNSNTLNNDIMLIKLKSAA"
+ "SLNSRVASISLPTSCASAGTQCLISGWGNTKSSGTSYPDVLKCLKAPIL"
+ "SDSSCKSAYPGQITSNMFCAGYLEGGKDSCQGDSGGPVVCSGKLQGIVS"
+ "WGSGCAQKNKPGVYTKVCNYVSWIKQTIASN");
protein.setNumDisulfideBonds(6);
Assert.assertEquals(37700, protein.getExtinctionCoeff());
Assert.assertEquals(1.57, protein.getPercentExtinctionCoeff(), 0.01);
// >P02655|APOC2_HUMAN Apolipoprotein C-II precursor (Apo-CII) (ApoC-II). Residues 23-101
Protein protein = new Protein();
protein.setSequence("TQQPQQDEMPSPTFLTQVKESLSSYWESAKTAAQNLYE"
+ "KTYLPAVDEKLRDLYSKSTAAMSTYTGIFTDQVLSVLKGEE");
Assert.assertEquals(4.59f, protein.getIsoelectricPoint(KaSet.SILLERO));
Assert.assertEquals(4.66f, protein.getIsoelectricPoint(KaSet.EXPASY));
Assert.assertEquals(4.60f, protein.getIsoelectricPoint(KaSet.BJELLQVIST));
// P01308
Protein chainA = new Protein().setSequence("giveqcctsicslyqlenycn");
ProteinXLink xlink = new ProteinXLink(ProteinXLinkType.DISULFIDE, chainA, 6, chainA, 11);
chainA.addXLink(xlink);
Protein chainB = new Protein().setSequence("vnqhlcgshlvealylvcgergffytpkt");
Protein insulin = new Protein().setID("Insulin");
insulin.addChain(chainA);
insulin.addChain(chainB);
insulin.addXLink(new ProteinXLink(ProteinXLinkType.DISULFIDE, chainA, 7, chainB, 6));
insulin.addXLink(new ProteinXLink(ProteinXLinkType.DISULFIDE, chainA, 20, chainB, 18));
// No matter which chains we added the xlinks to, getXLinks() should search down
// through enclosed chains and return all xlinks it finds.
Assert.assertEquals(3, insulin.getXLinks().size());
// Proinsulin
NucleicAcid na = new NucleicAcid().setID("AY899304.1")
.setSequence("atggc"
+ "cctgtggatgcgcctcctgcccctgctggcgctgctggccctctggggacctgacccagc"
+ "cgcagcctttgtgaaccaacacctgtgcggctcacacctggtggaagctctctacctagt"
+ "gtgcggggaacgaggcttcttctacacacccaagacccgccgggaggcagaggacctgca"
+ "ggtggggcaggtggagctgggcgggggccctggtgcaggcagcctgcagcccttggccct"
+ "ggaggggtccctgcagaagcgtggcattgtggaacaatgctgtaccagcatctgctccct"
+ "ctaccagctggagaactactgcaac");
NucleicAcidTranslator translator = new NucleicAcidTranslator();
Protein translation = translator.translate(na, TranslationFrame.A);
Assert.assertEquals("MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN", translation.getSequence());
Assert.assertEquals(TranslationFrame.A, translation.getAttribute(NucleicAcidTranslator.FRAME_ATT));