001package com.hfg.bio.seq.alignment.blast; 002 003import java.io.ByteArrayInputStream; 004import java.io.File; 005import java.io.IOException; 006import java.io.InputStream; 007import java.util.List; 008 009import com.hfg.bio.seq.BioSequence; 010import com.hfg.bio.seq.format.FASTA; 011import com.hfg.util.BooleanUtil; 012import com.hfg.util.Executor; 013import com.hfg.util.OS; 014import com.hfg.util.StringBuilderPlus; 015import com.hfg.util.StringUtil; 016import com.hfg.util.collection.CollectionUtil; 017 018//============================================================================== 019/** 020 Wrapper for a BLAST search. 021 <div> 022 Command-line executables are downloadable from <pre>ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/</pre> 023 </div> 024 @author J. Alex Taylor, hairyfatguy.com 025 */ 026//============================================================================== 027// com.hfg XML/HTML Coding Library 028// 029// This library is free software; you can redistribute it and/or 030// modify it under the terms of the GNU Lesser General Public 031// License as published by the Free Software Foundation; either 032// version 2.1 of the License, or (at your option) any later version. 033// 034// This library is distributed in the hope that it will be useful, 035// but WITHOUT ANY WARRANTY; without even the implied warranty of 036// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 037// Lesser General Public License for more details. 038// 039// You should have received a copy of the GNU Lesser General Public 040// License along with this library; if not, write to the Free Software 041// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 042// 043// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 044// jataylor@hairyfatguy.com 045//============================================================================== 046 047public class BLAST 048{ 049 private BLAST_Settings mSettings; 050 051 private static BLAST_Settings sDefaultSettings; 052 053 //########################################################################### 054 // CONSTRUCTORS 055 //########################################################################### 056 057 //--------------------------------------------------------------------------- 058 public BLAST() 059 { 060 this(null); 061 } 062 063 //--------------------------------------------------------------------------- 064 public BLAST(BLAST_Settings inSettings) 065 { 066 mSettings = inSettings != null ? inSettings : getDefaultSettings(); 067 } 068 069 //########################################################################### 070 // PUBLIC METHODS 071 //########################################################################### 072 073 //--------------------------------------------------------------------------- 074 public BLAST_Settings getSettings() 075 { 076 return mSettings; 077 } 078 079 //--------------------------------------------------------------------------- 080 public BLAST_Output run(BioSequence inQuery) 081 throws IOException 082 { 083 preflight(); 084 085 String cmd = generateCmd(); 086 087 Executor executor = new Executor(); 088 executor.setSTDIN(generateSTDIN(inQuery)); 089 executor.setCommand(cmd); 090 091 int exitStatus = executor.exec(); 092 093 BLAST_Output output = new BLAST_Output() 094 .setExitStatus(exitStatus) 095 .setStdErr(executor.getSTDERR()) 096 .setStdOut(executor.getSTDOUT()) 097 .setExecutedCmd(cmd); 098 099 return output; 100 } 101 102 //########################################################################### 103 // PRIVATE METHODS 104 //########################################################################### 105 106 //--------------------------------------------------------------------------- 107 public static void setDefaultSettings(BLAST_Settings inValue) 108 { 109 sDefaultSettings = inValue; 110 } 111 112 //--------------------------------------------------------------------------- 113 public static BLAST_Settings getDefaultSettings() 114 { 115 return (sDefaultSettings != null ? sDefaultSettings : new BLAST_Settings()); 116 } 117 118 //--------------------------------------------------------------------------- 119 private File getExecutable() 120 { 121 return (getSettings().getBLAST_Program() != null ? new File(getSettings().getExecutableDir(), getSettings().getBLAST_Program().name()) : null); 122 } 123 124 //--------------------------------------------------------------------------- 125 private void preflight() 126 throws IOException 127 { 128 File exe = getExecutable(); 129 if (null == exe) 130 { 131 throw new IOException("No BLAST program was specified!"); 132 } 133 else if (! exe.exists()) 134 { 135 File exeDir = exe.getParentFile(); 136 if (exeDir != null 137 && !exeDir.exists()) 138 { 139 throw new IOException("The BLAST executable directory " + StringUtil.singleQuote(exeDir.getPath()) + " doesn't exist!"); 140 } 141 142 throw new IOException("The BLAST executable " + StringUtil.singleQuote(exe.getPath()) + " doesn't exist!"); 143 } 144 145 // Check that the specified databases have BLAST index files 146 List<BLAST_Database> dbs = getSettings().getBLAST_Databases(); 147 if (! CollectionUtil.hasValues(dbs)) 148 { 149 throw new IOException("No BLAST databases specified for searching!"); 150 } 151 else 152 { 153 for (BLAST_Database db : dbs) 154 { 155/* TODO 156 if (! db.exists()) 157 { 158 throw new IOException("The BLAST database " + StringUtil.singleQuote(db.getPath()) + " doesn't exist!"); 159 } 160*/ 161 } 162 } 163 } 164 165 166 //--------------------------------------------------------------------------- 167 private String generateCmd() 168 { 169 StringBuilderPlus cmd = new StringBuilderPlus(getExecutable().getPath()).setDelimiter(" "); 170 171 cmd.delimitedAppend("-db " + generateDatabaseList()); 172 173 if (getSettings().getEValue() != null) 174 { 175 cmd.delimitedAppend("-evalue " + getSettings().getEValue()); 176 } 177 178 if (getSettings().getWordSize() != null) 179 { 180 cmd.delimitedAppend("-word_size " + getSettings().getWordSize()); 181 } 182 183 if (getSettings().getSoftMasking() != null) 184 { 185 cmd.delimitedAppend("-soft_masking " + getSettings().getSoftMasking()); 186 } 187 188 if (getSettings().getOutputFile() != null) 189 { 190 cmd.delimitedAppend("-out " + getSettings().getOutputFile().getPath()); 191 } 192 193 if (getSettings().getNumDescriptions() != null) 194 { 195 cmd.delimitedAppend("-num_descriptions " + getSettings().getNumDescriptions()); 196 } 197 198 if (getSettings().getNumAlignments() != null) 199 { 200 cmd.delimitedAppend("-num_alignments " + getSettings().getNumAlignments()); 201 } 202 203 if (BooleanUtil.valueOf(getSettings().htmlOutput())) 204 { 205 cmd.delimitedAppend("-html"); 206 } 207 208 if (getSettings().getNumThreads() != null) 209 { 210 cmd.delimitedAppend("-num_threads " + getSettings().getNumThreads()); 211 } 212 213 if (getSettings().getCommandLineParams() != null) 214 { 215 cmd.delimitedAppend(getSettings().getCommandLineParams()); 216 } 217 218 return cmd.toString(); 219 } 220 221 //--------------------------------------------------------------------------- 222 /* 223 From http://www.ncbi.nlm.nih.gov/books/NBK279669/ 224 225 To access a BLAST database containing spaces under Microsoft Windows it is necessary to use two sets of double-quotes, 226 escaping the innermost quotes with a backslash. For example, Users\joeuser\My Documents\Downloads would be accessed by: 227 228 blastdbcmd -db "\"Users\joeuser\My Documents\Downloads\mydb\"" -info 229 The first backslash escapes the beginning inner quote, and the backslash following “mydb” escapes the ending inner quote. 230 231 A second database can be added to this command by including it within the outer pair of quotes: 232 233 blastdbcmd -db "\"Users\joeuser\My Documents\Downloads\mydb\" myotherdb" -info 234 If the second database had contained a space, it would have been necessary to surround it by quotes escaped by a backslash. 235 236 Under UNIX systems (including LINUX and Mac OS X) it is preferable to use a single quote (‘) in place of the escaped double quote: 237 238 blastdbcmd -db ‘ "path with spaces/mydb" ’ -info 239 Multiple databases can also be listed within the single quotes, similar to the procedure described for Microsoft Windows. 240 */ 241 private String generateDatabaseList() 242 { 243 StringBuilderPlus dbList = new StringBuilderPlus().setDelimiter(" "); 244 245 boolean needOuterQuote = false; 246 for (BLAST_Database db : getSettings().getBLAST_Databases()) 247 { 248 dbList.delimitedAppend(db.getPath().contains(" ") ? StringUtil.quote(db.getPath()) : db.getPath()); 249 if (db.getPath().contains(" ") 250 && OS.value().equals(OS.Windows)) 251 { 252 needOuterQuote = true; 253 } 254 } 255 256 return (getSettings().getBLAST_Databases().size() > 1 || needOuterQuote ? 257 (OS.value().equals(OS.Windows) ? StringUtil.quote(dbList.toString()) : StringUtil.singleQuote(dbList.toString())) : dbList.toString()); 258 } 259 260 //--------------------------------------------------------------------------- 261 private InputStream generateSTDIN(BioSequence inQuery) 262 { 263 FASTA fasta = new FASTA(); 264 265 return new ByteArrayInputStream(fasta.write(inQuery).getBytes()); 266 } 267}