001package com.hfg.bio.seq.alignment.blast;
002
003import java.io.ByteArrayInputStream;
004import java.io.File;
005import java.io.IOException;
006import java.io.InputStream;
007import java.util.List;
008
009import com.hfg.bio.seq.BioSequence;
010import com.hfg.bio.seq.format.FASTA;
011import com.hfg.util.BooleanUtil;
012import com.hfg.util.Executor;
013import com.hfg.util.OS;
014import com.hfg.util.StringBuilderPlus;
015import com.hfg.util.StringUtil;
016import com.hfg.util.collection.CollectionUtil;
017
018//==============================================================================
019/**
020 Wrapper for a BLAST search.
021 <div>
022 Command-line executables are downloadable from <pre>ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/</pre>
023 </div>
024 @author J. Alex Taylor, hairyfatguy.com
025 */
026//==============================================================================
027// com.hfg XML/HTML Coding Library
028//
029// This library is free software; you can redistribute it and/or
030// modify it under the terms of the GNU Lesser General Public
031// License as published by the Free Software Foundation; either
032// version 2.1 of the License, or (at your option) any later version.
033//
034// This library is distributed in the hope that it will be useful,
035// but WITHOUT ANY WARRANTY; without even the implied warranty of
036// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
037// Lesser General Public License for more details.
038//
039// You should have received a copy of the GNU Lesser General Public
040// License along with this library; if not, write to the Free Software
041// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
042//
043// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
044// jataylor@hairyfatguy.com
045//==============================================================================
046
047public class BLAST
048{
049   private BLAST_Settings mSettings;
050
051   private static BLAST_Settings sDefaultSettings;
052
053   //###########################################################################
054   // CONSTRUCTORS
055   //###########################################################################
056
057   //---------------------------------------------------------------------------
058   public BLAST()
059   {
060      this(null);
061   }
062
063   //---------------------------------------------------------------------------
064   public BLAST(BLAST_Settings inSettings)
065   {
066      mSettings = inSettings != null ? inSettings : getDefaultSettings();
067   }
068
069   //###########################################################################
070   // PUBLIC METHODS
071   //###########################################################################
072
073   //---------------------------------------------------------------------------
074   public BLAST_Settings getSettings()
075   {
076      return mSettings;
077   }
078
079   //---------------------------------------------------------------------------
080   public BLAST_Output run(BioSequence inQuery)
081      throws IOException
082   {
083      preflight();
084
085      String cmd = generateCmd();
086
087      Executor executor = new Executor();
088      executor.setSTDIN(generateSTDIN(inQuery));
089      executor.setCommand(cmd);
090
091      int exitStatus = executor.exec();
092
093      BLAST_Output output = new BLAST_Output()
094            .setExitStatus(exitStatus)
095            .setStdErr(executor.getSTDERR())
096            .setStdOut(executor.getSTDOUT())
097            .setExecutedCmd(cmd);
098
099      return output;
100   }
101
102   //###########################################################################
103   // PRIVATE METHODS
104   //###########################################################################
105
106   //---------------------------------------------------------------------------
107   public static void setDefaultSettings(BLAST_Settings inValue)
108   {
109      sDefaultSettings = inValue;
110   }
111
112   //---------------------------------------------------------------------------
113   public static BLAST_Settings getDefaultSettings()
114   {
115      return (sDefaultSettings != null ? sDefaultSettings : new BLAST_Settings());
116   }
117
118   //---------------------------------------------------------------------------
119   private File getExecutable()
120   {
121      return (getSettings().getBLAST_Program() != null ? new File(getSettings().getExecutableDir(), getSettings().getBLAST_Program().name()) : null);
122   }
123
124   //---------------------------------------------------------------------------
125   private void preflight()
126      throws IOException
127   {
128      File exe = getExecutable();
129      if (null == exe)
130      {
131         throw new IOException("No BLAST program was specified!");
132      }
133      else if (! exe.exists())
134      {
135         File exeDir = exe.getParentFile();
136         if (exeDir != null
137               && !exeDir.exists())
138         {
139            throw new IOException("The BLAST executable directory " + StringUtil.singleQuote(exeDir.getPath()) + " doesn't exist!");
140         }
141
142         throw new IOException("The BLAST executable " + StringUtil.singleQuote(exe.getPath()) + " doesn't exist!");
143      }
144
145      // Check that the specified databases have BLAST index files
146      List<BLAST_Database> dbs = getSettings().getBLAST_Databases();
147      if (! CollectionUtil.hasValues(dbs))
148      {
149         throw new IOException("No BLAST databases specified for searching!");
150      }
151      else
152      {
153         for (BLAST_Database db : dbs)
154         {
155/* TODO
156            if (! db.exists())
157            {
158               throw new IOException("The BLAST database " + StringUtil.singleQuote(db.getPath()) + " doesn't exist!");
159            }
160*/
161         }
162      }
163   }
164
165
166   //---------------------------------------------------------------------------
167   private String generateCmd()
168   {
169      StringBuilderPlus cmd = new StringBuilderPlus(getExecutable().getPath()).setDelimiter(" ");
170
171      cmd.delimitedAppend("-db " + generateDatabaseList());
172
173      if (getSettings().getEValue() != null)
174      {
175         cmd.delimitedAppend("-evalue " + getSettings().getEValue());
176      }
177
178      if (getSettings().getWordSize() != null)
179      {
180         cmd.delimitedAppend("-word_size " + getSettings().getWordSize());
181      }
182
183      if (getSettings().getSoftMasking() != null)
184      {
185         cmd.delimitedAppend("-soft_masking " + getSettings().getSoftMasking());
186      }
187
188      if (getSettings().getOutputFile() != null)
189      {
190         cmd.delimitedAppend("-out " + getSettings().getOutputFile().getPath());
191      }
192
193      if (getSettings().getNumDescriptions() != null)
194      {
195         cmd.delimitedAppend("-num_descriptions " + getSettings().getNumDescriptions());
196      }
197
198      if (getSettings().getNumAlignments() != null)
199      {
200         cmd.delimitedAppend("-num_alignments " + getSettings().getNumAlignments());
201      }
202
203      if (BooleanUtil.valueOf(getSettings().htmlOutput()))
204      {
205         cmd.delimitedAppend("-html");
206      }
207
208      if (getSettings().getNumThreads() != null)
209      {
210         cmd.delimitedAppend("-num_threads " + getSettings().getNumThreads());
211      }
212
213      if (getSettings().getCommandLineParams() != null)
214      {
215         cmd.delimitedAppend(getSettings().getCommandLineParams());
216      }
217
218      return cmd.toString();
219   }
220
221   //---------------------------------------------------------------------------
222   /*
223   From http://www.ncbi.nlm.nih.gov/books/NBK279669/
224
225   To access a BLAST database containing spaces under Microsoft Windows it is necessary to use two sets of double-quotes,
226   escaping the innermost quotes with a backslash. For example, Users\joeuser\My Documents\Downloads would be accessed by:
227
228   blastdbcmd -db "\"Users\joeuser\My Documents\Downloads\mydb\"" -info
229   The first backslash escapes the beginning inner quote, and the backslash following “mydb” escapes the ending inner quote.
230
231   A second database can be added to this command by including it within the outer pair of quotes:
232
233   blastdbcmd -db "\"Users\joeuser\My Documents\Downloads\mydb\" myotherdb" -info
234   If the second database had contained a space, it would have been necessary to surround it by quotes escaped by a backslash.
235
236   Under UNIX systems (including LINUX and Mac OS X) it is preferable to use a single quote (‘) in place of the escaped double quote:
237
238   blastdbcmd -db ‘ "path with spaces/mydb" ’ -info
239   Multiple databases can also be listed within the single quotes, similar to the procedure described for Microsoft Windows.
240    */
241   private String generateDatabaseList()
242   {
243      StringBuilderPlus dbList = new StringBuilderPlus().setDelimiter(" ");
244
245      boolean needOuterQuote = false;
246      for (BLAST_Database db : getSettings().getBLAST_Databases())
247      {
248         dbList.delimitedAppend(db.getPath().contains(" ") ? StringUtil.quote(db.getPath()) : db.getPath());
249         if (db.getPath().contains(" ")
250             && OS.value().equals(OS.Windows))
251         {
252            needOuterQuote = true;
253         }
254      }
255
256      return (getSettings().getBLAST_Databases().size() > 1 || needOuterQuote ?
257                    (OS.value().equals(OS.Windows) ? StringUtil.quote(dbList.toString()) : StringUtil.singleQuote(dbList.toString())) : dbList.toString());
258   }
259
260   //---------------------------------------------------------------------------
261   private InputStream generateSTDIN(BioSequence inQuery)
262   {
263      FASTA fasta = new FASTA();
264
265      return new ByteArrayInputStream(fasta.write(inQuery).getBytes());
266   }
267}