001package com.hfg.bio.seq.alignment.blast; 002 003import java.io.File; 004import java.util.Date; 005import java.util.regex.Matcher; 006import java.util.regex.Pattern; 007 008import com.hfg.bio.seq.BioSequenceType; 009import com.hfg.util.Executor; 010import com.hfg.util.StringBuilderPlus; 011import com.hfg.util.StringUtil; 012import com.hfg.xml.XMLName; 013import com.hfg.xml.XMLTag; 014 015 016//============================================================================== 017/** 018 Container for a BLAST database. 019 020 @author J. Alex Taylor, hairyfatguy.com 021 */ 022//============================================================================== 023// com.hfg XML/HTML Coding Library 024// 025// This library is free software; you can redistribute it and/or 026// modify it under the terms of the GNU Lesser General Public 027// License as published by the Free Software Foundation; either 028// version 2.1 of the License, or (at your option) any later version. 029// 030// This library is distributed in the hope that it will be useful, 031// but WITHOUT ANY WARRANTY; without even the implied warranty of 032// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 033// Lesser General Public License for more details. 034// 035// You should have received a copy of the GNU Lesser General Public 036// License along with this library; if not, write to the Free Software 037// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 038// 039// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 040// jataylor@hairyfatguy.com 041//============================================================================== 042 043public class BLAST_Database extends File 044{ 045 private BioSequenceType mType; 046 047 protected static XMLName XML_NAME = new XMLName("BLAST_Database"); 048 protected static XMLName XML_NAME_ATT = new XMLName("name"); 049 protected static XMLName XML_SEQ_TYPE_ATT = new XMLName("seqType"); 050 protected static XMLName XML_DIR_ATT = new XMLName("dir"); 051 protected static XMLName XML_DESCRIPTION_ATT = new XMLName("description"); 052 053 private static final Pattern NUM_SEQS_PATTERN = Pattern.compile("\\s+([\\d\\,]+) sequences"); 054 055 private String mDescription; 056 private Date mCacheDate; 057 private Integer mNumSequences; 058 059 //########################################################################### 060 // CONSTRUCTORS 061 //########################################################################### 062 063 //--------------------------------------------------------------------------- 064 public BLAST_Database(String inName) 065 { 066 super(inName); 067 } 068 069 //--------------------------------------------------------------------------- 070 public BLAST_Database(File inParentDir, String inName) 071 { 072 super(inParentDir, inName); 073 } 074 075 //--------------------------------------------------------------------------- 076 public BLAST_Database(XMLTag inXMLTag) 077 { 078 super(inXMLTag.getAttributeValue(XML_DIR_ATT), inXMLTag.getAttributeValue(XML_NAME_ATT)); 079 080 inXMLTag.verifyTagName(XML_NAME); 081 082 if (! inXMLTag.hasAttribute(XML_SEQ_TYPE_ATT)) 083 { 084 throw new RuntimeException("No " + StringUtil.singleQuote(XML_SEQ_TYPE_ATT) + " specified for BLAST database " + StringUtil.singleQuote(getName()) + "!"); 085 } 086 087 setSeqType(BioSequenceType.valueOf(inXMLTag.getAttributeValue(XML_SEQ_TYPE_ATT))); 088 089 if (inXMLTag.hasAttribute(XML_DESCRIPTION_ATT)) 090 { 091 setDescription(inXMLTag.getAttributeValue(XML_DESCRIPTION_ATT)); 092 } 093 } 094 095 096 //########################################################################### 097 // PUBLIC METHODS 098 //########################################################################### 099 100 //--------------------------------------------------------------------------- 101 public XMLTag toXMLTag() 102 { 103 XMLTag tag = new XMLTag(XML_NAME); 104 tag.setAttribute(XML_NAME_ATT, getName()); 105 tag.setAttribute(XML_DIR_ATT, getParent()); 106 tag.setAttribute(XML_SEQ_TYPE_ATT, getSeqType()); 107 108 if (StringUtil.isSet(getDescription())) 109 { 110 tag.setAttribute(XML_DESCRIPTION_ATT, getDescription()); 111 } 112 113 return tag; 114 } 115 116 //--------------------------------------------------------------------------- 117 public BLAST_Database setSeqType(BioSequenceType inValue) 118 { 119 mType = inValue; 120 return this; 121 } 122 123 //--------------------------------------------------------------------------- 124 public BioSequenceType getSeqType() 125 { 126 return mType; 127 } 128 129 130 //--------------------------------------------------------------------------- 131 public BLAST_Database setDescription(String inValue) 132 { 133 mDescription = inValue; 134 return this; 135 } 136 137 //--------------------------------------------------------------------------- 138 public String getDescription() 139 { 140 return mDescription; 141 } 142 143 144 //--------------------------------------------------------------------------- 145 public Integer getNumSequences() 146 { 147 if (null == mNumSequences 148 || (mCacheDate != null 149 && mCacheDate.getTime() < lastModified())) 150 { 151/* Example: 152 /apps/blast/current/bin/blastdbcmd -db /apps/blast/db/swissprot -info 153 154Database: Non-redundant UniProtKB/SwissProt sequences 155 463,486 sequences; 173,912,377 total residues 156 157Date: May 12, 2016 10:23 AM Longest sequence: 35,213 residues 158 159Volumes: 160 /apps/blast/db/swissprot.00 161*/ 162 File exe = new File(BLAST.getDefaultSettings().getExecutableDir(), "blastdbcmd"); 163 if (! exe.exists()) 164 { 165 throw new RuntimeException("The BLAST executable " + StringUtil.singleQuote(exe) + " does not exist!"); 166 } 167 168 StringBuilderPlus cmd = new StringBuilderPlus(exe.getAbsolutePath() + " -db " + StringUtil.singleQuote(getAbsolutePath()) + " -info"); 169 Executor executor = new Executor(); 170 executor.setCommand(cmd.toString()); 171 172 int exitStatus = executor.exec(); 173 174 Matcher m = NUM_SEQS_PATTERN.matcher(executor.getSTDOUT()); 175 if (m.find()) 176 { 177 mNumSequences = Integer.parseInt(StringUtil.replaceAll(m.group(1), ",", "")); 178 } 179 180 mCacheDate = new Date(); 181 } 182 183 return mNumSequences; 184 } 185 186 187}