View Javadoc
1   package org.opentrafficsim.base.compressedfiles;
2   
3   import java.io.BufferedInputStream;
4   import java.io.Closeable;
5   import java.io.File;
6   import java.io.FileInputStream;
7   import java.io.IOException;
8   import java.io.InputStream;
9   import java.util.zip.GZIPInputStream;
10  import java.util.zip.ZipFile;
11  
12  import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
13  
14  /**
15   * Reader for compressed files.
16   * <p>
17   * Copyright (c) 2013-2019 Delft University of Technology, PO Box 5, 2600 AA, Delft, the Netherlands. All rights reserved. <br>
18   * BSD-style license. See <a href="http://opentrafficsim.org/node/13">OpenTrafficSim License</a>.
19   * <p>
20   * @version $Revision$, $LastChangedDate$, by $Author$, initial version Oct 25, 2018 <br>
21   * @author <a href="http://www.tbm.tudelft.nl/averbraeck">Alexander Verbraeck</a>
22   * @author <a href="http://www.tudelft.nl/pknoppers">Peter Knoppers</a>
23   * @author <a href="http://www.transport.citg.tudelft.nl">Wouter Schakel</a>
24   */
25  public final class Reader
26  {
27      /**
28       * Class with only static methods should not be instantiated.
29       */
30      private Reader()
31      {
32          // Do not instantiate.
33      }
34  
35      /**
36       * Construct a InputStream for a compressed data file.
37       * @param fileName String; the name of the file
38       * @param compressionType CompressionType; the expected type of the data compression in the file
39       * @return InputStream that can yield the expanded content of the file.
40       * @throws IOException when the file could not be read
41       */
42      public static InputStream createInputStream(final String fileName, final CompressionType compressionType) throws IOException
43      {
44          CompressionType useCompressionType =
45                  CompressionType.AUTODETECT.equals(compressionType) ? autoDetectCompressionType(fileName) : compressionType;
46          switch (useCompressionType)
47          {
48              case AUTODETECT:
49                  throw new IOException("Cannot happen");
50  
51              case BZIP2:
52                  // BUG create with "true" as second argument: see https://issues.apache.org/jira/browse/COMPRESS-224
53                  return new BZip2CompressorInputStream(new FileInputStream(fileName), true);
54  
55              case GZIP:
56                  return new GZIPInputStream(new FileInputStream(fileName));
57  
58              case NONE:
59                  return new FileInputStream(fileName);
60  
61              case ZIP:
62              {
63                  ZipFile zipFile = new ZipFile(fileName);
64                  return new ZipInputStream(zipFile, zipFile.getInputStream(zipFile.entries().nextElement()));
65              }
66  
67              default:
68                  // Cannot happen
69                  throw new IOException("Don't know how to create input stream for compression type " + compressionType);
70  
71          }
72      }
73  
74      /**
75       * Construct a InputStream for a compressed data file. The type of compression is auto-detected.
76       * @param fileName String; the name of the file
77       * @return InputStream that can yield the expanded content of the file.
78       * @throws IOException when the file can not be opened or read
79       */
80      public static InputStream createInputStream(final String fileName) throws IOException
81      {
82          return createInputStream(fileName, CompressionType.AUTODETECT);
83      }
84  
85      /**
86       * Determine the type of compression used in a file.
87       * <p>
88       * Derived from <a href="http://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped">
89       * http://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped</a>. <br>
90       * Gzip inflate an inputStream (if it is indeed gzip compressed), otherwise return an InputStream that yields the same data
91       * as the <cite>input</cite> argument.
92       * @param fileName String; the name of the file to check
93       * @return InputStream yielding the inflated data
94       * @throws IOException when errors occur reading the signature bytes
95       */
96      public static CompressionType autoDetectCompressionType(final String fileName) throws IOException
97      {
98          final int signatureSize = 10;
99          BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(new File(fileName)));
100         byte[] signature = new byte[signatureSize];
101         bufferedInputStream.read(signature); // read the signature
102         bufferedInputStream.close();
103         // for (int i = 0; i < signatureSize; i++)
104         // {
105         // System.err.println("byte " + i + " is " + String.format("%02x", signature[i]));
106         // }
107         if (isGZipCompressed(signature))
108         {
109             return CompressionType.GZIP;
110         }
111         else if (isBZipCompressed(signature))
112         {
113             return CompressionType.BZIP2;
114         }
115         else if (isZipCompressed(signature))
116         {
117             return CompressionType.ZIP;
118         }
119         return CompressionType.NONE;
120     }
121 
122     /**
123      * Determine if bytes match the GZip compression signature. Derived from
124      * <a href="http://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped">
125      * http://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped</a>. <br>
126      * Determines if a byte array is compressed. The java.util.zip GZip implementation does not expose the GZip header so it is
127      * difficult to determine if a string is compressed.
128      * @param bytes byte[]; at least 2 bytes from the start of the stream to determine compression type
129      * @return boolean; true if the data appears to be GZip compressed; false otherwise
130      * @throws java.io.IOException if the byte array couldn't be read
131      */
132     public static boolean isGZipCompressed(final byte[] bytes) throws IOException
133     {
134         return (bytes[0] == (byte) (GZIPInputStream.GZIP_MAGIC)) && (bytes[1] == (byte) (GZIPInputStream.GZIP_MAGIC >> 8));
135     }
136 
137     /**
138      * Determine if bytes match the BZip2 compression signature.
139      * @param bytes byte[]; at least 10 bytes from the start of the stream to determine compression type.
140      * @return boolean; true if bytes indicates the start of a BZip compressed stream
141      */
142     private static boolean isBZipCompressed(final byte[] bytes)
143     {
144         return bytes[0] == 'B' && bytes[1] == 'Z' && (bytes[2] == 'h' || bytes[2] == '0') && Character.isDigit(bytes[3])
145                 && bytes[4] == 0x31 && bytes[5] == 0x41 && bytes[6] == 0x59 && bytes[7] == 0x26 && bytes[8] == 0x53
146                 && bytes[9] == 0x59;
147     }
148 
149     /**
150      * Determine if bytes match a ZIP archive signature. Derived from <a href=
151      * "https://en.wikipedia.org/wiki/List_of_file_signatures">https://en.wikipedia.org/wiki/List_of_file_signatures</a>.
152      * @param bytes byte[]; at least 4 bytes from the start of the stream to determine compression type.
153      * @return boolean; true if bytes indicates the start of a ZIP archive; false otherwise
154      */
155     private static boolean isZipCompressed(final byte[] bytes)
156     {
157         if (bytes[0] != 0x50 || bytes[1] != 0x4b)
158         {
159             return false;
160         }
161         return 0x03 == bytes[2] && 0x04 == bytes[3] || 0x05 == bytes[2] && 0x06 == bytes[3]
162                 || 0x07 == bytes[2] && 0x08 == bytes[3];
163     }
164 
165     /**
166      * Container for a ZipFile that implements Readable and closes the contained ZipFile on close.
167      */
168     static class ZipInputStream extends InputStream implements Closeable
169     {
170         /** The ZipFile that needs to be closed when the input stream is closed. */
171         private final ZipFile zipFile;
172 
173         /** The input stream. */
174         private final InputStream inputStream;
175 
176         /**
177          * Construct a new ZipInputStream.
178          * @param zipFile ZipFile; the opened ZIP file
179          * @param inputStream InputStream; input stream of (the first) entry in the ZIP file
180          */
181         ZipInputStream(final ZipFile zipFile, final InputStream inputStream)
182         {
183             this.inputStream = inputStream;
184             this.zipFile = zipFile;
185         }
186 
187         /**
188          * Close down the reader and release all resources.
189          * @throws IOException when closing the reader fails
190          */
191         @Override
192         public void close() throws IOException
193         {
194             super.close();
195             this.zipFile.close();
196         }
197 
198         /** {@inheritDoc} */
199         @Override
200         public int read() throws IOException
201         {
202             return this.inputStream.read();
203         }
204 
205         /** {@inheritDoc} */
206         @Override
207         public String toString()
208         {
209             return "ZipInputStream [zipFile=" + this.zipFile + ", inputStream=" + this.inputStream + "]";
210         }
211 
212     }
213 
214 }