Reader.java

  1. package org.opentrafficsim.base.compressedfiles;

  2. import java.io.BufferedInputStream;
  3. import java.io.Closeable;
  4. import java.io.File;
  5. import java.io.FileInputStream;
  6. import java.io.IOException;
  7. import java.io.InputStream;
  8. import java.util.zip.GZIPInputStream;
  9. import java.util.zip.ZipFile;

  10. import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;

  11. /**
  12.  * Reader for compressed files.
  13.  * <p>
  14.  * Copyright (c) 2013-2020 Delft University of Technology, PO Box 5, 2600 AA, Delft, the Netherlands. All rights reserved. <br>
  15.  * BSD-style license. See <a href="http://opentrafficsim.org/node/13">OpenTrafficSim License</a>.
  16.  * <p>
  17.  * @version $Revision$, $LastChangedDate$, by $Author$, initial version Oct 25, 2018 <br>
  18.  * @author <a href="http://www.tbm.tudelft.nl/averbraeck">Alexander Verbraeck</a>
  19.  * @author <a href="http://www.tudelft.nl/pknoppers">Peter Knoppers</a>
  20.  * @author <a href="http://www.transport.citg.tudelft.nl">Wouter Schakel</a>
  21.  */
  22. public final class Reader
  23. {
  24.     /**
  25.      * Class with only static methods should not be instantiated.
  26.      */
  27.     private Reader()
  28.     {
  29.         // Do not instantiate.
  30.     }

  31.     /**
  32.      * Construct a InputStream for a compressed data file.
  33.      * @param fileName String; the name of the file
  34.      * @param compressionType CompressionType; the expected type of the data compression in the file
  35.      * @return InputStream that can yield the expanded content of the file.
  36.      * @throws IOException when the file could not be read
  37.      */
  38.     public static InputStream createInputStream(final String fileName, final CompressionType compressionType) throws IOException
  39.     {
  40.         CompressionType useCompressionType =
  41.                 CompressionType.AUTODETECT.equals(compressionType) ? autoDetectCompressionType(fileName) : compressionType;
  42.         switch (useCompressionType)
  43.         {
  44.             case AUTODETECT:
  45.                 throw new IOException("Cannot happen");

  46.             case BZIP2:
  47.                 // BUG create with "true" as second argument: see https://issues.apache.org/jira/browse/COMPRESS-224
  48.                 return new BZip2CompressorInputStream(new FileInputStream(fileName), true);

  49.             case GZIP:
  50.                 return new GZIPInputStream(new FileInputStream(fileName));

  51.             case NONE:
  52.                 return new FileInputStream(fileName);

  53.             case ZIP:
  54.             {
  55.                 ZipFile zipFile = new ZipFile(fileName);
  56.                 return new ZipInputStream(zipFile, zipFile.getInputStream(zipFile.entries().nextElement()));
  57.             }

  58.             default:
  59.                 // Cannot happen
  60.                 throw new IOException("Don't know how to create input stream for compression type " + compressionType);

  61.         }
  62.     }

  63.     /**
  64.      * Construct a InputStream for a compressed data file. The type of compression is auto-detected.
  65.      * @param fileName String; the name of the file
  66.      * @return InputStream that can yield the expanded content of the file.
  67.      * @throws IOException when the file can not be opened or read
  68.      */
  69.     public static InputStream createInputStream(final String fileName) throws IOException
  70.     {
  71.         return createInputStream(fileName, CompressionType.AUTODETECT);
  72.     }

  73.     /**
  74.      * Determine the type of compression used in a file.
  75.      * <p>
  76.      * Derived from <a href="http://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped">
  77.      * http://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped</a>. <br>
  78.      * Gzip inflate an inputStream (if it is indeed gzip compressed), otherwise return an InputStream that yields the same data
  79.      * as the <cite>input</cite> argument.
  80.      * @param fileName String; the name of the file to check
  81.      * @return InputStream yielding the inflated data
  82.      * @throws IOException when errors occur reading the signature bytes
  83.      */
  84.     public static CompressionType autoDetectCompressionType(final String fileName) throws IOException
  85.     {
  86.         final int signatureSize = 10;
  87.         BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(new File(fileName)));
  88.         byte[] signature = new byte[signatureSize];
  89.         bufferedInputStream.read(signature); // read the signature
  90.         bufferedInputStream.close();
  91.         // for (int i = 0; i < signatureSize; i++)
  92.         // {
  93.         // System.err.println("byte " + i + " is " + String.format("%02x", signature[i]));
  94.         // }
  95.         if (isGZipCompressed(signature))
  96.         {
  97.             return CompressionType.GZIP;
  98.         }
  99.         else if (isBZipCompressed(signature))
  100.         {
  101.             return CompressionType.BZIP2;
  102.         }
  103.         else if (isZipCompressed(signature))
  104.         {
  105.             return CompressionType.ZIP;
  106.         }
  107.         return CompressionType.NONE;
  108.     }

  109.     /**
  110.      * Determine if bytes match the GZip compression signature. Derived from
  111.      * <a href="http://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped">
  112.      * http://stackoverflow.com/questions/4818468/how-to-check-if-inputstream-is-gzipped</a>. <br>
  113.      * Determines if a byte array is compressed. The java.util.zip GZip implementation does not expose the GZip header so it is
  114.      * difficult to determine if a string is compressed.
  115.      * @param bytes byte[]; at least 2 bytes from the start of the stream to determine compression type
  116.      * @return boolean; true if the data appears to be GZip compressed; false otherwise
  117.      * @throws java.io.IOException if the byte array couldn't be read
  118.      */
  119.     public static boolean isGZipCompressed(final byte[] bytes) throws IOException
  120.     {
  121.         return (bytes[0] == (byte) (GZIPInputStream.GZIP_MAGIC)) && (bytes[1] == (byte) (GZIPInputStream.GZIP_MAGIC >> 8));
  122.     }

  123.     /**
  124.      * Determine if bytes match the BZip2 compression signature.
  125.      * @param bytes byte[]; at least 10 bytes from the start of the stream to determine compression type.
  126.      * @return boolean; true if bytes indicates the start of a BZip compressed stream
  127.      */
  128.     private static boolean isBZipCompressed(final byte[] bytes)
  129.     {
  130.         return bytes[0] == 'B' && bytes[1] == 'Z' && (bytes[2] == 'h' || bytes[2] == '0') && Character.isDigit(bytes[3])
  131.                 && bytes[4] == 0x31 && bytes[5] == 0x41 && bytes[6] == 0x59 && bytes[7] == 0x26 && bytes[8] == 0x53
  132.                 && bytes[9] == 0x59;
  133.     }

  134.     /**
  135.      * Determine if bytes match a ZIP archive signature. Derived from <a href=
  136.      * "https://en.wikipedia.org/wiki/List_of_file_signatures">https://en.wikipedia.org/wiki/List_of_file_signatures</a>.
  137.      * @param bytes byte[]; at least 4 bytes from the start of the stream to determine compression type.
  138.      * @return boolean; true if bytes indicates the start of a ZIP archive; false otherwise
  139.      */
  140.     private static boolean isZipCompressed(final byte[] bytes)
  141.     {
  142.         if (bytes[0] != 0x50 || bytes[1] != 0x4b)
  143.         {
  144.             return false;
  145.         }
  146.         return 0x03 == bytes[2] && 0x04 == bytes[3] || 0x05 == bytes[2] && 0x06 == bytes[3]
  147.                 || 0x07 == bytes[2] && 0x08 == bytes[3];
  148.     }

  149.     /**
  150.      * Container for a ZipFile that implements Readable and closes the contained ZipFile on close.
  151.      */
  152.     static class ZipInputStream extends InputStream implements Closeable
  153.     {
  154.         /** The ZipFile that needs to be closed when the input stream is closed. */
  155.         private final ZipFile zipFile;

  156.         /** The input stream. */
  157.         private final InputStream inputStream;

  158.         /**
  159.          * Construct a new ZipInputStream.
  160.          * @param zipFile ZipFile; the opened ZIP file
  161.          * @param inputStream InputStream; input stream of (the first) entry in the ZIP file
  162.          */
  163.         ZipInputStream(final ZipFile zipFile, final InputStream inputStream)
  164.         {
  165.             this.inputStream = inputStream;
  166.             this.zipFile = zipFile;
  167.         }

  168.         /**
  169.          * Close down the reader and release all resources.
  170.          * @throws IOException when closing the reader fails
  171.          */
  172.         @Override
  173.         public void close() throws IOException
  174.         {
  175.             super.close();
  176.             this.zipFile.close();
  177.         }

  178.         /** {@inheritDoc} */
  179.         @Override
  180.         public int read() throws IOException
  181.         {
  182.             return this.inputStream.read();
  183.         }

  184.         /** {@inheritDoc} */
  185.         @Override
  186.         public String toString()
  187.         {
  188.             return "ZipInputStream [zipFile=" + this.zipFile + ", inputStream=" + this.inputStream + "]";
  189.         }

  190.     }

  191. }