001package com.hfg.bio.taxonomy.ncbi; 002 003import java.io.BufferedReader; 004import java.io.File; 005import java.io.IOException; 006import java.io.InputStream; 007import java.io.InputStreamReader; 008import java.net.MalformedURLException; 009import java.net.URI; 010import java.net.URL; 011import java.util.logging.Level; 012import java.util.logging.Logger; 013import java.util.zip.ZipEntry; 014import java.util.zip.ZipInputStream; 015 016import com.hfg.exception.ProgrammingException; 017import com.hfg.util.StringUtil; 018 019//------------------------------------------------------------------------------ 020/** 021 A remote data source for use with NCBITaxon. The default URL is the location of 022 the taxdmp.zip on the NCBI's website. Any specified URL (or File) needs to have 023 the same format as the taxdmp.zip. 024 <div> 025 @author J. Alex Taylor, hairyfatguy.com 026 </div> 027 */ 028//------------------------------------------------------------------------------ 029// com.hfg Library 030// 031// This library is free software; you can redistribute it and/or 032// modify it under the terms of the GNU Lesser General Public 033// License as published by the Free Software Foundation; either 034// version 2.1 of the License, or (at your option) any later version. 035// 036// This library is distributed in the hope that it will be useful, 037// but WITHOUT ANY WARRANTY; without even the implied warranty of 038// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 039// Lesser General Public License for more details. 040// 041// You should have received a copy of the GNU Lesser General Public 042// License along with this library; if not, write to the Free Software 043// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 044// 045// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 046// jataylor@hairyfatguy.com 047//------------------------------------------------------------------------------ 048 049public class NCBIRemoteTaxonomyDataSource extends NCBITaxonomyDataSourceImpl 050{ 051 private URL mURL; 052 053 private static final Logger LOGGER = Logger.getLogger(NCBIRemoteTaxonomyDataSource.class.getPackage().getName()); 054 055 private static URL sNCBI_URL; 056 057 static 058 { 059 try 060 { 061 sNCBI_URL = new URL("https://ftp.ncbi.nih.gov/pub/taxonomy/taxdmp.zip"); 062 } 063 catch (MalformedURLException e) 064 { 065 throw new ProgrammingException(e); 066 } 067 } 068 069 //########################################################################### 070 // CONSTRUCTORS 071 //########################################################################### 072 073 //--------------------------------------------------------------------------- 074 public NCBIRemoteTaxonomyDataSource() 075 { 076 setURL(sNCBI_URL); 077 } 078 079 //--------------------------------------------------------------------------- 080 public NCBIRemoteTaxonomyDataSource(URL inURL) 081 { 082 setURL(inURL); 083 } 084 085 //--------------------------------------------------------------------------- 086 public NCBIRemoteTaxonomyDataSource(File inFile) 087 throws MalformedURLException 088 { 089 URI uri = inFile.toURI(); 090 setURL(uri.toURL()); 091 } 092 093 //########################################################################### 094 // PUBLIC METHODS 095 //########################################################################### 096 097 //--------------------------------------------------------------------------- 098 public static Logger getLogger() 099 { 100 return LOGGER; 101 } 102 103 //-------------------------------------------------------------------------- 104 public URL getURL() 105 { 106 return mURL; 107 } 108 109 //########################################################################### 110 // PROTECTED METHODS 111 //########################################################################### 112 113 //-------------------------------------------------------------------------- 114 protected synchronized void initialize() 115 { 116 if (! mURL.getFile().endsWith(".zip")) 117 { 118 throw new RuntimeException("The taxonomy data needs to be in a zipped archive!"); 119 } 120 121 try 122 { 123 initializeFromStream(mURL.openStream()); 124 } 125 catch (IOException e) 126 { 127 throw new RuntimeException("Problem accessing taxonomy data from " + mURL + " :", e); 128 } 129 } 130 131 //########################################################################### 132 // PRIVATE METHODS 133 //########################################################################### 134 135 //-------------------------------------------------------------------------- 136 private void setURL(URL inValue) 137 { 138 mURL = inValue; 139 } 140 141 //-------------------------------------------------------------------------- 142 private void initializeFromStream(InputStream inStream) 143 throws IOException 144 { 145 ZipInputStream zipInputStream = new ZipInputStream(inStream); 146 ZipEntry zipEntry = zipInputStream.getNextEntry(); 147 if (null == zipEntry) 148 { 149 throw new RuntimeException("Problem reading zipped taxonomy data from " + mURL + " !"); 150 } 151 152 LOGGER.log(Level.FINE, "Initializing from the remote URL " + StringUtil.singleQuote(mURL) + " ..."); 153 154 while (zipEntry != null) 155 { 156 String entryName = zipEntry.getName(); 157 File newFile = new File(entryName); 158 String directory = newFile.getParent(); 159 160 LOGGER.log(Level.FINE, "Zip entry:" + StringUtil.singleQuote(entryName)); 161 162 if (directory == null) 163 { 164 if (newFile.isDirectory()) 165 break; 166 } 167 168 if (entryName.equals("names.dmp")) 169 { 170 innerParseNamesFile(new BufferedReader(new InputStreamReader(zipInputStream))); 171 } 172 else if (entryName.equals("nodes.dmp")) 173 { 174 innerParseNodesFile(new BufferedReader(new InputStreamReader(zipInputStream))); 175 } 176 177 zipInputStream.closeEntry(); 178 zipEntry = zipInputStream.getNextEntry(); 179 } 180 181 zipInputStream.close(); 182 183 } 184 185}