1 package org.opentrafficsim.base.compressedfiles;
2
3 import java.io.BufferedInputStream;
4 import java.io.Closeable;
5 import java.io.File;
6 import java.io.FileInputStream;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.util.zip.GZIPInputStream;
10 import java.util.zip.ZipFile;
11
12 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
13
14 /**
15 * Reader for compressed files.
16 * <p>
17 * Copyright (c) 2013-2020 Delft University of Technology, PO Box 5, 2600 AA, Delft, the Netherlands. All rights reserved. <br>
18 * BSD-style license. See <a href="http://opentrafficsim.org/node/13">OpenTrafficSim License</a>.
19 * <p>
20 * @version $Revision$, $LastChangedDate$, by $Author$, initial version Oct 25, 2018 <br>
21 * @author <a href="http://www.tbm.tudelft.nl/averbraeck">Alexander Verbraeck</a>
22 * @author <a href="http://www.tudelft.nl/pknoppers">Peter Knoppers</a>
23 * @author <a href="http://www.transport.citg.tudelft.nl">Wouter Schakel</a>
24 */
25 public final class Reader
26 {
27 /**
28 * Class with only static methods should not be instantiated.
29 */
30 private Reader()
31 {
32 // Do not instantiate.
33 }
34
35 /**
36 * Construct a InputStream for a compressed data file.
37 * @param fileName String; the name of the file
38 * @param compressionType CompressionType; the expected type of the data compression in the file
39 * @return InputStream that can yield the expanded content of the file.
40 * @throws IOException when the file could not be read
41 */
42 public static InputStream createInputStream(final String fileName, final CompressionType compressionType) throws IOException
43 {
44 CompressionType useCompressionType =
45 CompressionType.AUTODETECT.equals(compressionType) ? autoDetectCompressionType(fileName) : compressionType;
46 switch (useCompressionType)
47 {
48 case AUTODETECT:
49 throw new IOException("Cannot happen");
50
51 case BZIP2:
52 // BUG create with "true" as second argument: see https://issues.apache.org/jira/browse/COMPRESS-224
53 return new BZip2CompressorInputStream(new FileInputStream(fileName), true);
54
55 case GZIP:
56 return new GZIPInputStream(new FileInputStream(fileName));
57
58 case NONE:
59 return new FileInputStream(fileName);
60
61 case ZIP:
62 {
63 ZipFile zipFile = new ZipFile(fileName);
64 return new ZipInputStream(zipFile, zipFile.getInputStream(zipFile.entries().nextElement()));
65 }
66
67 default:
68 // Cannot happen
69 throw new IOException("Don't know how to create input stream for compression type " + compressionType);
70
71 }
72 }
73
74 /**
75 * Construct a InputStream for a compressed data file. The type of compression is auto-detected.
76 * @param fileName String; the name of the file
77 * @return InputStream that can yield the expanded content of the file.
78 * @throws IOException when the file can not be opened or read
79 */
80 public static InputStream createInputStream(final String fileName) throws IOException
81 {
82 return createInputStream(fileName, CompressionType.AUTODETECT);
83 }
84
85 /**
86 * Determine the type of compression used in a file.
87 * <p>
88 * Derived from <a href="http://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped">
89 * http://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped</a>. <br>
90 * Gzip inflate an inputStream (if it is indeed gzip compressed), otherwise return an InputStream that yields the same data
91 * as the <cite>input</cite> argument.
92 * @param fileName String; the name of the file to check
93 * @return InputStream yielding the inflated data
94 * @throws IOException when errors occur reading the signature bytes
95 */
96 public static CompressionType autoDetectCompressionType(final String fileName) throws IOException
97 {
98 final int signatureSize = 10;
99 BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(new File(fileName)));
100 byte[] signature = new byte[signatureSize];
101 bufferedInputStream.read(signature); // read the signature
102 bufferedInputStream.close();
103 // for (int i = 0; i < signatureSize; i++)
104 // {
105 // System.err.println("byte " + i + " is " + String.format("%02x", signature[i]));
106 // }
107 if (isGZipCompressed(signature))
108 {
109 return CompressionType.GZIP;
110 }
111 else if (isBZipCompressed(signature))
112 {
113 return CompressionType.BZIP2;
114 }
115 else if (isZipCompressed(signature))
116 {
117 return CompressionType.ZIP;
118 }
119 return CompressionType.NONE;
120 }
121
122 /**
123 * Determine if bytes match the GZip compression signature. Derived from
124 * <a href="http://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped">
125 * http://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped</a>. <br>
126 * Determines if a byte array is compressed. The java.util.zip GZip implementation does not expose the GZip header so it is
127 * difficult to determine if a string is compressed.
128 * @param bytes byte[]; at least 2 bytes from the start of the stream to determine compression type
129 * @return boolean; true if the data appears to be GZip compressed; false otherwise
130 * @throws java.io.IOException if the byte array couldn't be read
131 */
132 public static boolean isGZipCompressed(final byte[] bytes) throws IOException
133 {
134 return (bytes[0] == (byte) (GZIPInputStream.GZIP_MAGIC)) && (bytes[1] == (byte) (GZIPInputStream.GZIP_MAGIC >> 8));
135 }
136
137 /**
138 * Determine if bytes match the BZip2 compression signature.
139 * @param bytes byte[]; at least 10 bytes from the start of the stream to determine compression type.
140 * @return boolean; true if bytes indicates the start of a BZip compressed stream
141 */
142 private static boolean isBZipCompressed(final byte[] bytes)
143 {
144 return bytes[0] == 'B' && bytes[1] == 'Z' && (bytes[2] == 'h' || bytes[2] == '0') && Character.isDigit(bytes[3])
145 && bytes[4] == 0x31 && bytes[5] == 0x41 && bytes[6] == 0x59 && bytes[7] == 0x26 && bytes[8] == 0x53
146 && bytes[9] == 0x59;
147 }
148
149 /**
150 * Determine if bytes match a ZIP archive signature. Derived from <a href=
151 * "https://en.wikipedia.org/wiki/List_of_file_signatures">https://en.wikipedia.org/wiki/List_of_file_signatures</a>.
152 * @param bytes byte[]; at least 4 bytes from the start of the stream to determine compression type.
153 * @return boolean; true if bytes indicates the start of a ZIP archive; false otherwise
154 */
155 private static boolean isZipCompressed(final byte[] bytes)
156 {
157 if (bytes[0] != 0x50 || bytes[1] != 0x4b)
158 {
159 return false;
160 }
161 return 0x03 == bytes[2] && 0x04 == bytes[3] || 0x05 == bytes[2] && 0x06 == bytes[3]
162 || 0x07 == bytes[2] && 0x08 == bytes[3];
163 }
164
165 /**
166 * Container for a ZipFile that implements Readable and closes the contained ZipFile on close.
167 */
168 static class ZipInputStream extends InputStream implements Closeable
169 {
170 /** The ZipFile that needs to be closed when the input stream is closed. */
171 private final ZipFile zipFile;
172
173 /** The input stream. */
174 private final InputStream inputStream;
175
176 /**
177 * Construct a new ZipInputStream.
178 * @param zipFile ZipFile; the opened ZIP file
179 * @param inputStream InputStream; input stream of (the first) entry in the ZIP file
180 */
181 ZipInputStream(final ZipFile zipFile, final InputStream inputStream)
182 {
183 this.inputStream = inputStream;
184 this.zipFile = zipFile;
185 }
186
187 /**
188 * Close down the reader and release all resources.
189 * @throws IOException when closing the reader fails
190 */
191 @Override
192 public void close() throws IOException
193 {
194 super.close();
195 this.zipFile.close();
196 }
197
198 /** {@inheritDoc} */
199 @Override
200 public int read() throws IOException
201 {
202 return this.inputStream.read();
203 }
204
205 /** {@inheritDoc} */
206 @Override
207 public String toString()
208 {
209 return "ZipInputStream [zipFile=" + this.zipFile + ", inputStream=" + this.inputStream + "]";
210 }
211
212 }
213
214 }