View Javadoc
1   package org.opentrafficsim.base.compressedfiles;
2   
3   import java.io.BufferedInputStream;
4   import java.io.Closeable;
5   import java.io.File;
6   import java.io.FileInputStream;
7   import java.io.IOException;
8   import java.io.InputStream;
9   import java.util.zip.GZIPInputStream;
10  import java.util.zip.ZipFile;
11  
12  /**
13   * Reader for compressed files.
14   * <p>
15   * Copyright (c) 2013-2022 Delft University of Technology, PO Box 5, 2600 AA, Delft, the Netherlands. All rights reserved. <br>
16   * BSD-style license. See <a href="http://opentrafficsim.org/node/13">OpenTrafficSim License</a>.
17   * <p>
18   * @version $Revision$, $LastChangedDate$, by $Author$, initial version Oct 25, 2018 <br>
19   * @author <a href="http://www.tbm.tudelft.nl/averbraeck">Alexander Verbraeck</a>
20   * @author <a href="http://www.tudelft.nl/pknoppers">Peter Knoppers</a>
21   * @author <a href="http://www.transport.citg.tudelft.nl">Wouter Schakel</a>
22   */
23  public final class Reader
24  {
25      /**
26       * Class with only static methods should not be instantiated.
27       */
28      private Reader()
29      {
30          // Do not instantiate.
31      }
32  
33      /**
34       * Construct a InputStream for a compressed data file.
35       * @param fileName String; the name of the file
36       * @param compressionType CompressionType; the expected type of the data compression in the file
37       * @return InputStream that can yield the expanded content of the file.
38       * @throws IOException when the file could not be read
39       */
40      public static InputStream createInputStream(final String fileName, final CompressionType compressionType) throws IOException
41      {
42          CompressionType useCompressionType =
43                  CompressionType.AUTODETECT.equals(compressionType) ? autoDetectCompressionType(fileName) : compressionType;
44          switch (useCompressionType)
45          {
46              case AUTODETECT:
47                  throw new IOException("Cannot happen");
48  
49              case GZIP:
50                  return new GZIPInputStream(new FileInputStream(fileName));
51  
52              case NONE:
53                  return new FileInputStream(fileName);
54  
55              case ZIP:
56              {
57                  ZipFile zipFile = new ZipFile(fileName);
58                  return new ZipInputStream(zipFile, zipFile.getInputStream(zipFile.entries().nextElement()));
59              }
60  
61              default:
62                  // Cannot happen
63                  throw new IOException("Don't know how to create input stream for compression type " + compressionType);
64  
65          }
66      }
67  
68      /**
69       * Construct a InputStream for a compressed data file. The type of compression is auto-detected.
70       * @param fileName String; the name of the file
71       * @return InputStream that can yield the expanded content of the file.
72       * @throws IOException when the file can not be opened or read
73       */
74      public static InputStream createInputStream(final String fileName) throws IOException
75      {
76          return createInputStream(fileName, CompressionType.AUTODETECT);
77      }
78  
79      /**
80       * Determine the type of compression used in a file.
81       * <p>
82       * Derived from <a href="http://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped">
83       * http://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped</a>. <br>
84       * Gzip inflate an inputStream (if it is indeed gzip compressed), otherwise return an InputStream that yields the same data
85       * as the <cite>input</cite> argument.
86       * @param fileName String; the name of the file to check
87       * @return InputStream yielding the inflated data
88       * @throws IOException when errors occur reading the signature bytes
89       */
90      public static CompressionType autoDetectCompressionType(final String fileName) throws IOException
91      {
92          final int signatureSize = 10;
93          BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(new File(fileName)));
94          byte[] signature = new byte[signatureSize];
95          bufferedInputStream.read(signature); // read the signature
96          bufferedInputStream.close();
97          // for (int i = 0; i < signatureSize; i++)
98          // {
99          // System.err.println("byte " + i + " is " + String.format("%02x", signature[i]));
100         // }
101         if (isGZipCompressed(signature))
102         {
103             return CompressionType.GZIP;
104         }
105         else if (isZipCompressed(signature))
106         {
107             return CompressionType.ZIP;
108         }
109         return CompressionType.NONE;
110     }
111 
112     /**
113      * Determine if bytes match the GZip compression signature. Derived from
114      * <a href="http://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped">
115      * http://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped</a>. <br>
116      * Determines if a byte array is compressed. The java.util.zip GZip implementation does not expose the GZip header so it is
117      * difficult to determine if a string is compressed.
118      * @param bytes byte[]; at least 2 bytes from the start of the stream to determine compression type
119      * @return boolean; true if the data appears to be GZip compressed; false otherwise
120      * @throws java.io.IOException if the byte array couldn't be read
121      */
122     public static boolean isGZipCompressed(final byte[] bytes) throws IOException
123     {
124         return (bytes[0] == (byte) (GZIPInputStream.GZIP_MAGIC)) && (bytes[1] == (byte) (GZIPInputStream.GZIP_MAGIC >> 8));
125     }
126 
127     /**
128      * Determine if bytes match a ZIP archive signature. Derived from <a href=
129      * "https://en.wikipedia.org/wiki/List_of_file_signatures">https://en.wikipedia.org/wiki/List_of_file_signatures</a>.
130      * @param bytes byte[]; at least 4 bytes from the start of the stream to determine compression type.
131      * @return boolean; true if bytes indicates the start of a ZIP archive; false otherwise
132      */
133     private static boolean isZipCompressed(final byte[] bytes)
134     {
135         if (bytes[0] != 0x50 || bytes[1] != 0x4b)
136         {
137             return false;
138         }
139         return 0x03 == bytes[2] && 0x04 == bytes[3] || 0x05 == bytes[2] && 0x06 == bytes[3]
140                 || 0x07 == bytes[2] && 0x08 == bytes[3];
141     }
142 
143     /**
144      * Container for a ZipFile that implements Readable and closes the contained ZipFile on close.
145      */
146     static class ZipInputStream extends InputStream implements Closeable
147     {
148         /** The ZipFile that needs to be closed when the input stream is closed. */
149         private final ZipFile zipFile;
150 
151         /** The input stream. */
152         private final InputStream inputStream;
153 
154         /**
155          * Construct a new ZipInputStream.
156          * @param zipFile ZipFile; the opened ZIP file
157          * @param inputStream InputStream; input stream of (the first) entry in the ZIP file
158          */
159         ZipInputStream(final ZipFile zipFile, final InputStream inputStream)
160         {
161             this.inputStream = inputStream;
162             this.zipFile = zipFile;
163         }
164 
165         /**
166          * Close down the reader and release all resources.
167          * @throws IOException when closing the reader fails
168          */
169         @Override
170         public void close() throws IOException
171         {
172             super.close();
173             this.zipFile.close();
174         }
175 
176         /** {@inheritDoc} */
177         @Override
178         public int read() throws IOException
179         {
180             return this.inputStream.read();
181         }
182 
183         /** {@inheritDoc} */
184         @Override
185         public String toString()
186         {
187             return "ZipInputStream [zipFile=" + this.zipFile + ", inputStream=" + this.inputStream + "]";
188         }
189 
190     }
191 
192 }