001package com.hfg.bio.taxonomy.ncbi;
002
003import java.io.BufferedReader;
004import java.io.File;
005import java.io.IOException;
006import java.io.InputStream;
007import java.io.InputStreamReader;
008import java.net.MalformedURLException;
009import java.net.URI;
010import java.net.URL;
011import java.util.logging.Level;
012import java.util.logging.Logger;
013import java.util.zip.ZipEntry;
014import java.util.zip.ZipInputStream;
015
016import com.hfg.exception.ProgrammingException;
017import com.hfg.util.StringUtil;
018
019//------------------------------------------------------------------------------
020/**
021 A remote data source for use with NCBITaxon. The default URL is the location of
022 the taxdmp.zip on the NCBI's website. Any specified URL (or File) needs to have
023 the same format as the taxdmp.zip.
024 <div>
025  @author J. Alex Taylor, hairyfatguy.com
026 </div>
027 */
028//------------------------------------------------------------------------------
029// com.hfg Library
030//
031// This library is free software; you can redistribute it and/or
032// modify it under the terms of the GNU Lesser General Public
033// License as published by the Free Software Foundation; either
034// version 2.1 of the License, or (at your option) any later version.
035//
036// This library is distributed in the hope that it will be useful,
037// but WITHOUT ANY WARRANTY; without even the implied warranty of
038// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
039// Lesser General Public License for more details.
040//
041// You should have received a copy of the GNU Lesser General Public
042// License along with this library; if not, write to the Free Software
043// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
044//
045// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
046// jataylor@hairyfatguy.com
047//------------------------------------------------------------------------------
048
049public class NCBIRemoteTaxonomyDataSource extends NCBITaxonomyDataSourceImpl
050{
051   private URL  mURL;
052
053   private static final Logger LOGGER = Logger.getLogger(NCBIRemoteTaxonomyDataSource.class.getPackage().getName());
054
055   private static URL sNCBI_URL;
056
057   static
058   {
059      try
060      {
061         sNCBI_URL = new URL("https://ftp.ncbi.nih.gov/pub/taxonomy/taxdmp.zip");
062      }
063      catch (MalformedURLException e)
064      {
065         throw new ProgrammingException(e);
066      }
067   }
068
069   //###########################################################################
070   // CONSTRUCTORS
071   //###########################################################################
072
073   //---------------------------------------------------------------------------
074   public NCBIRemoteTaxonomyDataSource()
075   {
076      setURL(sNCBI_URL);
077   }
078
079   //---------------------------------------------------------------------------
080   public NCBIRemoteTaxonomyDataSource(URL inURL)
081   {
082      setURL(inURL);
083   }
084
085   //---------------------------------------------------------------------------
086   public NCBIRemoteTaxonomyDataSource(File inFile)
087      throws MalformedURLException
088   {
089      URI uri = inFile.toURI();
090      setURL(uri.toURL());
091   }
092
093   //###########################################################################
094   // PUBLIC METHODS
095   //###########################################################################
096
097   //---------------------------------------------------------------------------
098   public static Logger getLogger()
099   {
100      return LOGGER;
101   }
102
103   //--------------------------------------------------------------------------
104   public URL getURL()
105   {
106      return mURL;
107   }
108
109   //###########################################################################
110   // PROTECTED METHODS
111   //###########################################################################
112
113   //--------------------------------------------------------------------------
114   protected synchronized void initialize()
115   {
116      if (! mURL.getFile().endsWith(".zip"))
117      {
118         throw new RuntimeException("The taxonomy data needs to be in a zipped archive!");
119      }
120
121      try
122      {
123         initializeFromStream(mURL.openStream());
124      }
125      catch (IOException e)
126      {
127         throw new RuntimeException("Problem accessing taxonomy data from " + mURL + " :", e);
128      }
129   }
130
131   //###########################################################################
132   // PRIVATE METHODS
133   //###########################################################################
134
135   //--------------------------------------------------------------------------
136   private void setURL(URL inValue)
137   {
138      mURL = inValue;
139   }
140
141   //--------------------------------------------------------------------------
142   private void initializeFromStream(InputStream inStream)
143      throws IOException
144   {
145      ZipInputStream zipInputStream = new ZipInputStream(inStream);
146      ZipEntry zipEntry = zipInputStream.getNextEntry();
147      if (null == zipEntry)
148      {
149         throw new RuntimeException("Problem reading zipped taxonomy data from " + mURL + " !");
150      }
151
152      LOGGER.log(Level.FINE, "Initializing from the remote URL " + StringUtil.singleQuote(mURL) + " ...");
153
154      while (zipEntry != null)
155      {
156         String entryName = zipEntry.getName();
157         File newFile     = new File(entryName);
158         String directory = newFile.getParent();
159
160         LOGGER.log(Level.FINE, "Zip entry:" + StringUtil.singleQuote(entryName));
161
162         if (directory == null)
163         {
164            if (newFile.isDirectory())
165               break;
166         }
167
168         if (entryName.equals("names.dmp"))
169         {
170            innerParseNamesFile(new BufferedReader(new InputStreamReader(zipInputStream)));
171         }
172         else if (entryName.equals("nodes.dmp"))
173         {
174            innerParseNodesFile(new BufferedReader(new InputStreamReader(zipInputStream)));
175         }
176
177         zipInputStream.closeEntry();
178         zipEntry = zipInputStream.getNextEntry();
179      }
180
181      zipInputStream.close();
182
183   }
184
185}