001package com.hfg.bio.seq.alignment.blast;
002
003import java.io.File;
004import java.util.Date;
005import java.util.regex.Matcher;
006import java.util.regex.Pattern;
007
008import com.hfg.bio.seq.BioSequenceType;
009import com.hfg.util.Executor;
010import com.hfg.util.StringBuilderPlus;
011import com.hfg.util.StringUtil;
012import com.hfg.xml.XMLName;
013import com.hfg.xml.XMLTag;
014
015
016//==============================================================================
017/**
018 Container for a BLAST database.
019
020 @author J. Alex Taylor, hairyfatguy.com
021 */
022//==============================================================================
023// com.hfg XML/HTML Coding Library
024//
025// This library is free software; you can redistribute it and/or
026// modify it under the terms of the GNU Lesser General Public
027// License as published by the Free Software Foundation; either
028// version 2.1 of the License, or (at your option) any later version.
029//
030// This library is distributed in the hope that it will be useful,
031// but WITHOUT ANY WARRANTY; without even the implied warranty of
032// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
033// Lesser General Public License for more details.
034//
035// You should have received a copy of the GNU Lesser General Public
036// License along with this library; if not, write to the Free Software
037// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
038//
039// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
040// jataylor@hairyfatguy.com
041//==============================================================================
042
043public class BLAST_Database extends File
044{
045   private BioSequenceType mType;
046
047   protected static XMLName XML_NAME       = new XMLName("BLAST_Database");
048   protected static XMLName XML_NAME_ATT   = new XMLName("name");
049   protected static XMLName XML_SEQ_TYPE_ATT   = new XMLName("seqType");
050   protected static XMLName XML_DIR_ATT    = new XMLName("dir");
051   protected static XMLName XML_DESCRIPTION_ATT = new XMLName("description");
052
053   private static final Pattern NUM_SEQS_PATTERN = Pattern.compile("\\s+([\\d\\,]+) sequences");
054
055   private String  mDescription;
056   private Date    mCacheDate;
057   private Integer mNumSequences;
058
059   //###########################################################################
060   // CONSTRUCTORS
061   //###########################################################################
062
063   //---------------------------------------------------------------------------
064   public BLAST_Database(String inName)
065   {
066      super(inName);
067   }
068
069   //---------------------------------------------------------------------------
070   public BLAST_Database(File inParentDir, String inName)
071   {
072      super(inParentDir, inName);
073   }
074
075   //---------------------------------------------------------------------------
076   public BLAST_Database(XMLTag inXMLTag)
077   {
078      super(inXMLTag.getAttributeValue(XML_DIR_ATT), inXMLTag.getAttributeValue(XML_NAME_ATT));
079
080      inXMLTag.verifyTagName(XML_NAME);
081
082      if (! inXMLTag.hasAttribute(XML_SEQ_TYPE_ATT))
083      {
084         throw new RuntimeException("No " + StringUtil.singleQuote(XML_SEQ_TYPE_ATT) + " specified for BLAST database " + StringUtil.singleQuote(getName()) + "!");
085      }
086
087      setSeqType(BioSequenceType.valueOf(inXMLTag.getAttributeValue(XML_SEQ_TYPE_ATT)));
088
089      if (inXMLTag.hasAttribute(XML_DESCRIPTION_ATT))
090      {
091         setDescription(inXMLTag.getAttributeValue(XML_DESCRIPTION_ATT));
092      }
093   }
094
095
096   //###########################################################################
097   // PUBLIC METHODS
098   //###########################################################################
099
100   //---------------------------------------------------------------------------
101   public XMLTag toXMLTag()
102   {
103      XMLTag tag = new XMLTag(XML_NAME);
104      tag.setAttribute(XML_NAME_ATT, getName());
105      tag.setAttribute(XML_DIR_ATT, getParent());
106      tag.setAttribute(XML_SEQ_TYPE_ATT, getSeqType());
107
108      if (StringUtil.isSet(getDescription()))
109      {
110         tag.setAttribute(XML_DESCRIPTION_ATT, getDescription());
111      }
112
113      return tag;
114   }
115
116   //---------------------------------------------------------------------------
117   public BLAST_Database setSeqType(BioSequenceType inValue)
118   {
119      mType = inValue;
120      return this;
121   }
122
123   //---------------------------------------------------------------------------
124   public BioSequenceType getSeqType()
125   {
126      return mType;
127   }
128
129
130   //---------------------------------------------------------------------------
131   public BLAST_Database setDescription(String inValue)
132   {
133      mDescription = inValue;
134      return this;
135   }
136
137   //---------------------------------------------------------------------------
138   public String getDescription()
139   {
140      return mDescription;
141   }
142
143
144   //---------------------------------------------------------------------------
145   public Integer getNumSequences()
146   {
147      if (null == mNumSequences
148            || (mCacheDate != null
149                && mCacheDate.getTime() < lastModified()))
150      {
151/* Example:
152         /apps/blast/current/bin/blastdbcmd -db /apps/blast/db/swissprot -info
153
154Database: Non-redundant UniProtKB/SwissProt sequences
155        463,486 sequences; 173,912,377 total residues
156
157Date: May 12, 2016  10:23 AM    Longest sequence: 35,213 residues
158
159Volumes:
160        /apps/blast/db/swissprot.00
161*/
162         File exe = new File(BLAST.getDefaultSettings().getExecutableDir(), "blastdbcmd");
163         if (! exe.exists())
164         {
165            throw new RuntimeException("The BLAST executable " + StringUtil.singleQuote(exe) + " does not exist!");
166         }
167
168         StringBuilderPlus cmd = new StringBuilderPlus(exe.getAbsolutePath() + " -db " + StringUtil.singleQuote(getAbsolutePath()) + " -info");
169         Executor executor = new Executor();
170         executor.setCommand(cmd.toString());
171
172         int exitStatus = executor.exec();
173
174         Matcher m = NUM_SEQS_PATTERN.matcher(executor.getSTDOUT());
175         if (m.find())
176         {
177            mNumSequences = Integer.parseInt(StringUtil.replaceAll(m.group(1), ",", ""));
178         }
179
180         mCacheDate = new Date();
181      }
182
183      return mNumSequences;
184   }
185
186
187}