001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.nio.ByteBuffer;
028import java.util.zip.CRC32;
029import java.util.zip.DataFormatException;
030import java.util.zip.Inflater;
031import java.util.zip.ZipEntry;
032import java.util.zip.ZipException;
033
034import org.apache.commons.compress.archivers.ArchiveEntry;
035import org.apache.commons.compress.archivers.ArchiveInputStream;
036import org.apache.commons.compress.utils.IOUtils;
037
038import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
039import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
040import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
041import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
042
043/**
044 * Implements an input stream that can read Zip archives.
045 *
046 * <p>As of Apache Commons Compress it transparently supports Zip64
047 * extensions and thus individual entries and archives larger than 4
048 * GB or with more than 65536 entries.</p>
049 *
050 * <p>The {@link ZipFile} class is preferred when reading from files
051 * as {@link ZipArchiveInputStream} is limited by not being able to
052 * read the central directory header before returning entries.  In
053 * particular {@link ZipArchiveInputStream}</p>
054 *
055 * <ul>
056 *
057 *  <li>may return entries that are not part of the central directory
058 *  at all and shouldn't be considered part of the archive.</li>
059 *
060 *  <li>may return several entries with the same name.</li>
061 *
062 *  <li>will not return internal or external attributes.</li>
063 *
064 *  <li>may return incomplete extra field data.</li>
065 *
066 *  <li>may return unknown sizes and CRC values for entries until the
067 *  next entry has been reached if the archive uses the data
068 *  descriptor feature.</li>
069 *
070 * </ul>
071 *
072 * @see ZipFile
073 * @NotThreadSafe
074 */
075public class ZipArchiveInputStream extends ArchiveInputStream {
076
077    /** The zip encoding to use for filenames and the file comment. */
078    private final ZipEncoding zipEncoding;
079
080    // the provided encoding (for unit tests)
081    final String encoding;
082
083    /** Whether to look for and use Unicode extra fields. */
084    private final boolean useUnicodeExtraFields;
085
086    /** Wrapped stream, will always be a PushbackInputStream. */
087    private final InputStream in;
088
089    /** Inflater used for all deflated entries. */
090    private final Inflater inf = new Inflater(true);
091
092    /** Buffer used to read from the wrapped stream. */
093    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
094
095    /** The entry that is currently being read. */
096    private CurrentEntry current = null;
097
098    /** Whether the stream has been closed. */
099    private boolean closed = false;
100
101    /** Whether the stream has reached the central directory - and thus found all entries. */
102    private boolean hitCentralDirectory = false;
103
104    /**
105     * When reading a stored entry that uses the data descriptor this
106     * stream has to read the full entry and caches it.  This is the
107     * cache.
108     */
109    private ByteArrayInputStream lastStoredEntry = null;
110
111    /** Whether the stream will try to read STORED entries that use a data descriptor. */
112    private boolean allowStoredEntriesWithDataDescriptor = false;
113
114    private static final int LFH_LEN = 30;
115    /*
116      local file header signature     WORD
117      version needed to extract       SHORT
118      general purpose bit flag        SHORT
119      compression method              SHORT
120      last mod file time              SHORT
121      last mod file date              SHORT
122      crc-32                          WORD
123      compressed size                 WORD
124      uncompressed size               WORD
125      file name length                SHORT
126      extra field length              SHORT
127    */
128
129    private static final int CFH_LEN = 46;
130    /*
131        central file header signature   WORD
132        version made by                 SHORT
133        version needed to extract       SHORT
134        general purpose bit flag        SHORT
135        compression method              SHORT
136        last mod file time              SHORT
137        last mod file date              SHORT
138        crc-32                          WORD
139        compressed size                 WORD
140        uncompressed size               WORD
141        file name length                SHORT
142        extra field length              SHORT
143        file comment length             SHORT
144        disk number start               SHORT
145        internal file attributes        SHORT
146        external file attributes        WORD
147        relative offset of local header WORD
148    */
149
150    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
151
152    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
153    private final byte[] LFH_BUF = new byte[LFH_LEN];
154    private final byte[] SKIP_BUF = new byte[1024];
155    private final byte[] SHORT_BUF = new byte[SHORT];
156    private final byte[] WORD_BUF = new byte[WORD];
157    private final byte[] TWO_DWORD_BUF = new byte[2 * DWORD];
158
159    private int entriesRead = 0;
160
161    /**
162     * Create an instance using UTF-8 encoding
163     * @param inputStream the stream to wrap
164     */
165    public ZipArchiveInputStream(InputStream inputStream) {
166        this(inputStream, ZipEncodingHelper.UTF8);
167    }
168
169    /**
170     * Create an instance using the specified encoding
171     * @param inputStream the stream to wrap
172     * @param encoding the encoding to use for file names, use null
173     * for the platform's default encoding
174     * @since 1.5
175     */
176    public ZipArchiveInputStream(InputStream inputStream, String encoding) {
177        this(inputStream, encoding, true);
178    }
179
180    /**
181     * Create an instance using the specified encoding
182     * @param inputStream the stream to wrap
183     * @param encoding the encoding to use for file names, use null
184     * for the platform's default encoding
185     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
186     * Extra Fields (if present) to set the file names.
187     */
188    public ZipArchiveInputStream(InputStream inputStream, String encoding, boolean useUnicodeExtraFields) {
189        this(inputStream, encoding, useUnicodeExtraFields, false);
190    }
191
192    /**
193     * Create an instance using the specified encoding
194     * @param inputStream the stream to wrap
195     * @param encoding the encoding to use for file names, use null
196     * for the platform's default encoding
197     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
198     * Extra Fields (if present) to set the file names.
199     * @param allowStoredEntriesWithDataDescriptor whether the stream
200     * will try to read STORED entries that use a data descriptor
201     * @since 1.1
202     */
203    public ZipArchiveInputStream(InputStream inputStream,
204                                 String encoding,
205                                 boolean useUnicodeExtraFields,
206                                 boolean allowStoredEntriesWithDataDescriptor) {
207        this.encoding = encoding;
208        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
209        this.useUnicodeExtraFields = useUnicodeExtraFields;
210        in = new PushbackInputStream(inputStream, buf.capacity());
211        this.allowStoredEntriesWithDataDescriptor =
212            allowStoredEntriesWithDataDescriptor;
213        // haven't read anything so far
214        buf.limit(0);
215    }
216
217    public ZipArchiveEntry getNextZipEntry() throws IOException {
218        boolean firstEntry = true;
219        if (closed || hitCentralDirectory) {
220            return null;
221        }
222        if (current != null) {
223            closeEntry();
224            firstEntry = false;
225        }
226
227        try {
228            if (firstEntry) {
229                // split archives have a special signature before the
230                // first local file header - look for it and fail with
231                // the appropriate error message if this is a split
232                // archive.
233                readFirstLocalFileHeader(LFH_BUF);
234            } else {
235                readFully(LFH_BUF);
236            }
237        } catch (EOFException e) {
238            return null;
239        }
240
241        ZipLong sig = new ZipLong(LFH_BUF);
242        if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) {
243            hitCentralDirectory = true;
244            skipRemainderOfArchive();
245        }
246        if (!sig.equals(ZipLong.LFH_SIG)) {
247            return null;
248        }
249
250        int off = WORD;
251        current = new CurrentEntry();
252
253        int versionMadeBy = ZipShort.getValue(LFH_BUF, off);
254        off += SHORT;
255        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
256
257        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(LFH_BUF, off);
258        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
259        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
260        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
261        current.entry.setGeneralPurposeBit(gpFlag);
262
263        off += SHORT;
264
265        current.entry.setMethod(ZipShort.getValue(LFH_BUF, off));
266        off += SHORT;
267
268        long time = ZipUtil.dosToJavaTime(ZipLong.getValue(LFH_BUF, off));
269        current.entry.setTime(time);
270        off += WORD;
271
272        ZipLong size = null, cSize = null;
273        if (!current.hasDataDescriptor) {
274            current.entry.setCrc(ZipLong.getValue(LFH_BUF, off));
275            off += WORD;
276
277            cSize = new ZipLong(LFH_BUF, off);
278            off += WORD;
279
280            size = new ZipLong(LFH_BUF, off);
281            off += WORD;
282        } else {
283            off += 3 * WORD;
284        }
285
286        int fileNameLen = ZipShort.getValue(LFH_BUF, off);
287
288        off += SHORT;
289
290        int extraLen = ZipShort.getValue(LFH_BUF, off);
291        off += SHORT;
292
293        byte[] fileName = new byte[fileNameLen];
294        readFully(fileName);
295        current.entry.setName(entryEncoding.decode(fileName), fileName);
296
297        byte[] extraData = new byte[extraLen];
298        readFully(extraData);
299        current.entry.setExtra(extraData);
300
301        if (!hasUTF8Flag && useUnicodeExtraFields) {
302            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
303        }
304
305        processZip64Extra(size, cSize);
306
307        if (current.entry.getCompressedSize() != ZipArchiveEntry.SIZE_UNKNOWN) {
308            if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
309                current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize()));
310            } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
311                current.in = new ExplodingInputStream(
312                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
313                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
314                        new BoundedInputStream(in, current.entry.getCompressedSize()));
315            }
316        }
317        
318        entriesRead++;
319        return current.entry;
320    }
321
322    /**
323     * Fills the given array with the first local file header and
324     * deals with splitting/spanning markers that may prefix the first
325     * LFH.
326     */
327    private void readFirstLocalFileHeader(byte[] lfh) throws IOException {
328        readFully(lfh);
329        ZipLong sig = new ZipLong(lfh);
330        if (sig.equals(ZipLong.DD_SIG)) {
331            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
332        }
333
334        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
335            // The archive is not really split as only one segment was
336            // needed in the end.  Just skip over the marker.
337            byte[] missedLfhBytes = new byte[4];
338            readFully(missedLfhBytes);
339            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
340            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
341        }
342    }
343
344    /**
345     * Records whether a Zip64 extra is present and sets the size
346     * information from it if sizes are 0xFFFFFFFF and the entry
347     * doesn't use a data descriptor.
348     */
349    private void processZip64Extra(ZipLong size, ZipLong cSize) {
350        Zip64ExtendedInformationExtraField z64 =
351            (Zip64ExtendedInformationExtraField) 
352            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
353        current.usesZip64 = z64 != null;
354        if (!current.hasDataDescriptor) {
355            if (z64 != null // same as current.usesZip64 but avoids NPE warning
356                    && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) {
357                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
358                current.entry.setSize(z64.getSize().getLongValue());
359            } else {
360                current.entry.setCompressedSize(cSize.getValue());
361                current.entry.setSize(size.getValue());
362            }
363        }
364    }
365
366    @Override
367    public ArchiveEntry getNextEntry() throws IOException {
368        return getNextZipEntry();
369    }
370
371    /**
372     * Whether this class is able to read the given entry.
373     *
374     * <p>May return false if it is set up to use encryption or a
375     * compression method that hasn't been implemented yet.</p>
376     * @since 1.1
377     */
378    @Override
379    public boolean canReadEntryData(ArchiveEntry ae) {
380        if (ae instanceof ZipArchiveEntry) {
381            ZipArchiveEntry ze = (ZipArchiveEntry) ae;
382            return ZipUtil.canHandleEntryData(ze)
383                && supportsDataDescriptorFor(ze);
384
385        }
386        return false;
387    }
388
389    @Override
390    public int read(byte[] buffer, int offset, int length) throws IOException {
391        if (closed) {
392            throw new IOException("The stream is closed");
393        }
394
395        if (current == null) {
396            return -1;
397        }
398
399        // avoid int overflow, check null buffer
400        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
401            throw new ArrayIndexOutOfBoundsException();
402        }
403        
404        ZipUtil.checkRequestedFeatures(current.entry);
405        if (!supportsDataDescriptorFor(current.entry)) {
406            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
407                    current.entry);
408        }
409
410        int read;
411        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
412            read = readStored(buffer, offset, length);
413        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
414            read = readDeflated(buffer, offset, length);
415        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
416                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
417            read = current.in.read(buffer, offset, length);
418        } else {
419            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
420                    current.entry);
421        }
422        
423        if (read >= 0) {
424            current.crc.update(buffer, offset, read);
425        }
426        
427        return read;
428    }
429
430    /**
431     * Implementation of read for STORED entries.
432     */
433    private int readStored(byte[] buffer, int offset, int length) throws IOException {
434
435        if (current.hasDataDescriptor) {
436            if (lastStoredEntry == null) {
437                readStoredEntry();
438            }
439            return lastStoredEntry.read(buffer, offset, length);
440        }
441
442        long csize = current.entry.getSize();
443        if (current.bytesRead >= csize) {
444            return -1;
445        }
446
447        if (buf.position() >= buf.limit()) {
448            buf.position(0);
449            int l = in.read(buf.array());
450            if (l == -1) {
451                return -1;
452            }
453            buf.limit(l);
454
455            count(l);
456            current.bytesReadFromStream += l;
457        }
458
459        int toRead = Math.min(buf.remaining(), length);
460        if ((csize - current.bytesRead) < toRead) {
461            // if it is smaller than toRead then it fits into an int
462            toRead = (int) (csize - current.bytesRead);
463        }
464        buf.get(buffer, offset, toRead);
465        current.bytesRead += toRead;
466        return toRead;
467    }
468
469    /**
470     * Implementation of read for DEFLATED entries.
471     */
472    private int readDeflated(byte[] buffer, int offset, int length) throws IOException {
473        int read = readFromInflater(buffer, offset, length);
474        if (read <= 0) {
475            if (inf.finished()) {
476                return -1;
477            } else if (inf.needsDictionary()) {
478                throw new ZipException("This archive needs a preset dictionary"
479                                       + " which is not supported by Commons"
480                                       + " Compress.");
481            } else if (read == -1) {
482                throw new IOException("Truncated ZIP file");
483            }
484        }
485        return read;
486    }
487
488    /**
489     * Potentially reads more bytes to fill the inflater's buffer and
490     * reads from it.
491     */
492    private int readFromInflater(byte[] buffer, int offset, int length) throws IOException {
493        int read = 0;
494        do {
495            if (inf.needsInput()) {
496                int l = fill();
497                if (l > 0) {
498                    current.bytesReadFromStream += buf.limit();
499                } else if (l == -1) {
500                    return -1;
501                } else {
502                    break;
503                }
504            }
505            try {
506                read = inf.inflate(buffer, offset, length);
507            } catch (DataFormatException e) {
508                throw (IOException) new ZipException(e.getMessage()).initCause(e);
509            }
510        } while (read == 0 && inf.needsInput());
511        return read;
512    }
513
514    @Override
515    public void close() throws IOException {
516        if (!closed) {
517            closed = true;
518            in.close();
519            inf.end();
520        }
521    }
522
523    /**
524     * Skips over and discards value bytes of data from this input
525     * stream.
526     *
527     * <p>This implementation may end up skipping over some smaller
528     * number of bytes, possibly 0, if and only if it reaches the end
529     * of the underlying stream.</p>
530     *
531     * <p>The actual number of bytes skipped is returned.</p>
532     *
533     * @param value the number of bytes to be skipped.
534     * @return the actual number of bytes skipped.
535     * @throws IOException - if an I/O error occurs.
536     * @throws IllegalArgumentException - if value is negative.
537     */
538    @Override
539    public long skip(long value) throws IOException {
540        if (value >= 0) {
541            long skipped = 0;
542            while (skipped < value) {
543                long rem = value - skipped;
544                int x = read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length));
545                if (x == -1) {
546                    return skipped;
547                }
548                skipped += x;
549            }
550            return skipped;
551        }
552        throw new IllegalArgumentException();
553    }
554
555    /**
556     * Checks if the signature matches what is expected for a zip file.
557     * Does not currently handle self-extracting zips which may have arbitrary
558     * leading content.
559     *
560     * @param signature the bytes to check
561     * @param length    the number of bytes to check
562     * @return true, if this stream is a zip archive stream, false otherwise
563     */
564    public static boolean matches(byte[] signature, int length) {
565        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
566            return false;
567        }
568
569        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
570            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
571            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
572            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
573    }
574
575    private static boolean checksig(byte[] signature, byte[] expected) {
576        for (int i = 0; i < expected.length; i++) {
577            if (signature[i] != expected[i]) {
578                return false;
579            }
580        }
581        return true;
582    }
583
584    /**
585     * Closes the current ZIP archive entry and positions the underlying
586     * stream to the beginning of the next entry. All per-entry variables
587     * and data structures are cleared.
588     * <p>
589     * If the compressed size of this entry is included in the entry header,
590     * then any outstanding bytes are simply skipped from the underlying
591     * stream without uncompressing them. This allows an entry to be safely
592     * closed even if the compression method is unsupported.
593     * <p>
594     * In case we don't know the compressed size of this entry or have
595     * already buffered too much data from the underlying stream to support
596     * uncompression, then the uncompression process is completed and the
597     * end position of the stream is adjusted based on the result of that
598     * process.
599     *
600     * @throws IOException if an error occurs
601     */
602    private void closeEntry() throws IOException {
603        if (closed) {
604            throw new IOException("The stream is closed");
605        }
606        if (current == null) {
607            return;
608        }
609
610        // Ensure all entry bytes are read
611        if (current.bytesReadFromStream <= current.entry.getCompressedSize()
612                && !current.hasDataDescriptor) {
613            drainCurrentEntryData();
614        } else {
615            skip(Long.MAX_VALUE);
616
617            long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
618                       ? getBytesInflated() : current.bytesRead;
619
620            // this is at most a single read() operation and can't
621            // exceed the range of int
622            int diff = (int) (current.bytesReadFromStream - inB);
623
624            // Pushback any required bytes
625            if (diff > 0) {
626                pushback(buf.array(), buf.limit() - diff, diff);
627            }
628        }
629
630        if (lastStoredEntry == null && current.hasDataDescriptor) {
631            readDataDescriptor();
632        }
633
634        inf.reset();
635        buf.clear().flip();
636        current = null;
637        lastStoredEntry = null;
638    }
639
640    /**
641     * Read all data of the current entry from the underlying stream
642     * that hasn't been read, yet.
643     */
644    private void drainCurrentEntryData() throws IOException {
645        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
646        while (remaining > 0) {
647            long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
648            if (n < 0) {
649                throw new EOFException("Truncated ZIP entry: " + current.entry.getName());
650            } else {
651                count(n);
652                remaining -= n;
653            }
654        }
655    }
656
657    /**
658     * Get the number of bytes Inflater has actually processed.
659     *
660     * <p>for Java &lt; Java7 the getBytes* methods in
661     * Inflater/Deflater seem to return unsigned ints rather than
662     * longs that start over with 0 at 2^32.</p>
663     *
664     * <p>The stream knows how many bytes it has read, but not how
665     * many the Inflater actually consumed - it should be between the
666     * total number of bytes read for the entry and the total number
667     * minus the last read operation.  Here we just try to make the
668     * value close enough to the bytes we've read by assuming the
669     * number of bytes consumed must be smaller than (or equal to) the
670     * number of bytes read but not smaller by more than 2^32.</p>
671     */
672    private long getBytesInflated() {
673        long inB = inf.getBytesRead();
674        if (current.bytesReadFromStream >= TWO_EXP_32) {
675            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
676                inB += TWO_EXP_32;
677            }
678        }
679        return inB;
680    }
681
682    private int fill() throws IOException {
683        if (closed) {
684            throw new IOException("The stream is closed");
685        }
686        int length = in.read(buf.array());
687        if (length > 0) {
688            buf.limit(length);
689            count(buf.limit());
690            inf.setInput(buf.array(), 0, buf.limit());
691        }
692        return length;
693    }
694
695    private void readFully(byte[] b) throws IOException {
696        int count = IOUtils.readFully(in, b);
697        count(count);
698        if (count < b.length) {
699            throw new EOFException();
700        }
701    }
702
703    private void readDataDescriptor() throws IOException {
704        readFully(WORD_BUF);
705        ZipLong val = new ZipLong(WORD_BUF);
706        if (ZipLong.DD_SIG.equals(val)) {
707            // data descriptor with signature, skip sig
708            readFully(WORD_BUF);
709            val = new ZipLong(WORD_BUF);
710        }
711        current.entry.setCrc(val.getValue());
712
713        // if there is a ZIP64 extra field, sizes are eight bytes
714        // each, otherwise four bytes each.  Unfortunately some
715        // implementations - namely Java7 - use eight bytes without
716        // using a ZIP64 extra field -
717        // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
718
719        // just read 16 bytes and check whether bytes nine to twelve
720        // look like one of the signatures of what could follow a data
721        // descriptor (ignoring archive decryption headers for now).
722        // If so, push back eight bytes and assume sizes are four
723        // bytes, otherwise sizes are eight bytes each.
724        readFully(TWO_DWORD_BUF);
725        ZipLong potentialSig = new ZipLong(TWO_DWORD_BUF, DWORD);
726        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
727            pushback(TWO_DWORD_BUF, DWORD, DWORD);
728            current.entry.setCompressedSize(ZipLong.getValue(TWO_DWORD_BUF));
729            current.entry.setSize(ZipLong.getValue(TWO_DWORD_BUF, WORD));
730        } else {
731            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF));
732            current.entry.setSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF, DWORD));
733        }
734    }
735
736    /**
737     * Whether this entry requires a data descriptor this library can work with.
738     *
739     * @return true if allowStoredEntriesWithDataDescriptor is true,
740     * the entry doesn't require any data descriptor or the method is
741     * DEFLATED.
742     */
743    private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) {
744        return !entry.getGeneralPurposeBit().usesDataDescriptor()
745
746                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
747                || entry.getMethod() == ZipEntry.DEFLATED;
748    }
749
750    /**
751     * Caches a stored entry that uses the data descriptor.
752     *
753     * <ul>
754     *   <li>Reads a stored entry until the signature of a local file
755     *     header, central directory header or data descriptor has been
756     *     found.</li>
757     *   <li>Stores all entry data in lastStoredEntry.</p>
758     *   <li>Rewinds the stream to position at the data
759     *     descriptor.</li>
760     *   <li>reads the data descriptor</li>
761     * </ul>
762     *
763     * <p>After calling this method the entry should know its size,
764     * the entry's data is cached and the stream is positioned at the
765     * next local file or central directory header.</p>
766     */
767    private void readStoredEntry() throws IOException {
768        ByteArrayOutputStream bos = new ByteArrayOutputStream();
769        int off = 0;
770        boolean done = false;
771
772        // length of DD without signature
773        int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
774
775        while (!done) {
776            int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
777            if (r <= 0) {
778                // read the whole archive without ever finding a
779                // central directory
780                throw new IOException("Truncated ZIP file");
781            }
782            if (r + off < 4) {
783                // buffer too small to check for a signature, loop
784                off += r;
785                continue;
786            }
787
788            done = bufferContainsSignature(bos, off, r, ddLen);
789            if (!done) {
790                off = cacheBytesRead(bos, off, r, ddLen);
791            }
792        }
793
794        byte[] b = bos.toByteArray();
795        lastStoredEntry = new ByteArrayInputStream(b);
796    }
797
798    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
799    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
800    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
801
802    /**
803     * Checks whether the current buffer contains the signature of a
804     * &quot;data descriptor&quot;, &quot;local file header&quot; or
805     * &quot;central directory entry&quot;.
806     *
807     * <p>If it contains such a signature, reads the data descriptor
808     * and positions the stream right after the data descriptor.</p>
809     */
810    private boolean bufferContainsSignature(ByteArrayOutputStream bos, int offset, int lastRead, int expectedDDLen)
811            throws IOException {
812
813        boolean done = false;
814        int readTooMuch = 0;
815        for (int i = 0; !done && i < lastRead - 4; i++) {
816            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
817                if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
818                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
819                    // found a LFH or CFH:
820                    readTooMuch = offset + lastRead - i - expectedDDLen;
821                    done = true;
822                }
823                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
824                    // found DD:
825                    readTooMuch = offset + lastRead - i;
826                    done = true;
827                }
828                if (done) {
829                    // * push back bytes read in excess as well as the data
830                    //   descriptor
831                    // * copy the remaining bytes to cache
832                    // * read data descriptor
833                    pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch);
834                    bos.write(buf.array(), 0, i);
835                    readDataDescriptor();
836                }
837            }
838        }
839        return done;
840    }
841
842    /**
843     * If the last read bytes could hold a data descriptor and an
844     * incomplete signature then save the last bytes to the front of
845     * the buffer and cache everything in front of the potential data
846     * descriptor into the given ByteArrayOutputStream.
847     *
848     * <p>Data descriptor plus incomplete signature (3 bytes in the
849     * worst case) can be 20 bytes max.</p>
850     */
851    private int cacheBytesRead(ByteArrayOutputStream bos, int offset, int lastRead, int expecteDDLen) {
852        final int cacheable = offset + lastRead - expecteDDLen - 3;
853        if (cacheable > 0) {
854            bos.write(buf.array(), 0, cacheable);
855            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
856            offset = expecteDDLen + 3;
857        } else {
858            offset += lastRead;
859        }
860        return offset;
861    }
862
863    private void pushback(byte[] buf, int offset, int length) throws IOException {
864        ((PushbackInputStream) in).unread(buf, offset, length);
865        pushedBackBytes(length);
866    }
867
868    // End of Central Directory Record
869    //   end of central dir signature    WORD
870    //   number of this disk             SHORT
871    //   number of the disk with the
872    //   start of the central directory  SHORT
873    //   total number of entries in the
874    //   central directory on this disk  SHORT
875    //   total number of entries in
876    //   the central directory           SHORT
877    //   size of the central directory   WORD
878    //   offset of start of central
879    //   directory with respect to
880    //   the starting disk number        WORD
881    //   .ZIP file comment length        SHORT
882    //   .ZIP file comment               up to 64KB
883    //
884
885    /**
886     * Reads the stream until it find the "End of central directory
887     * record" and consumes it as well.
888     */
889    private void skipRemainderOfArchive() throws IOException {
890        // skip over central directory. One LFH has been read too much
891        // already.  The calculation discounts file names and extra
892        // data so it will be too short.
893        realSkip(entriesRead * CFH_LEN - LFH_LEN);
894        findEocdRecord();
895        realSkip(ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
896        readFully(SHORT_BUF);
897        // file comment
898        realSkip(ZipShort.getValue(SHORT_BUF));
899    }
900
901    /**
902     * Reads forward until the signature of the &quot;End of central
903     * directory&quot; record is found.
904     */
905    private void findEocdRecord() throws IOException {
906        int currentByte = -1;
907        boolean skipReadCall = false;
908        while (skipReadCall || (currentByte = readOneByte()) > -1) {
909            skipReadCall = false;
910            if (!isFirstByteOfEocdSig(currentByte)) {
911                continue;
912            }
913            currentByte = readOneByte();
914            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
915                if (currentByte == -1) {
916                    break;
917                }
918                skipReadCall = isFirstByteOfEocdSig(currentByte);
919                continue;
920            }
921            currentByte = readOneByte();
922            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
923                if (currentByte == -1) {
924                    break;
925                }
926                skipReadCall = isFirstByteOfEocdSig(currentByte);
927                continue;
928            }
929            currentByte = readOneByte();
930            if (currentByte == -1
931                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
932                break;
933            }
934            skipReadCall = isFirstByteOfEocdSig(currentByte);
935        }
936    }
937
938    /**
939     * Skips bytes by reading from the underlying stream rather than
940     * the (potentially inflating) archive stream - which {@link
941     * #skip} would do.
942     *
943     * Also updates bytes-read counter.
944     */
945    private void realSkip(long value) throws IOException {
946        if (value >= 0) {
947            long skipped = 0;
948            while (skipped < value) {
949                long rem = value - skipped;
950                int x = in.read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length));
951                if (x == -1) {
952                    return;
953                }
954                count(x);
955                skipped += x;
956            }
957            return;
958        }
959        throw new IllegalArgumentException();
960    }
961
962    /**
963     * Reads bytes by reading from the underlying stream rather than
964     * the (potentially inflating) archive stream - which {@link #read} would do.
965     *
966     * Also updates bytes-read counter.
967     */
968    private int readOneByte() throws IOException {
969        int b = in.read();
970        if (b != -1) {
971            count(1);
972        }
973        return b;
974    }
975
976    private boolean isFirstByteOfEocdSig(int b) {
977        return b == ZipArchiveOutputStream.EOCD_SIG[0];
978    }
979
980    /**
981     * Structure collecting information for the entry that is
982     * currently being read.
983     */
984    private static final class CurrentEntry {
985
986        /**
987         * Current ZIP entry.
988         */
989        private final ZipArchiveEntry entry = new ZipArchiveEntry();
990
991        /**
992         * Does the entry use a data descriptor?
993         */
994        private boolean hasDataDescriptor;
995
996        /**
997         * Does the entry have a ZIP64 extended information extra field.
998         */
999        private boolean usesZip64;
1000
1001        /**
1002         * Number of bytes of entry content read by the client if the
1003         * entry is STORED.
1004         */
1005        private long bytesRead;
1006
1007        /**
1008         * Number of bytes of entry content read so from the stream.
1009         *
1010         * <p>This may be more than the actual entry's length as some
1011         * stuff gets buffered up and needs to be pushed back when the
1012         * end of the entry has been reached.</p>
1013         */
1014        private long bytesReadFromStream;
1015
1016        /**
1017         * The checksum calculated as the current entry is read.
1018         */
1019        private final CRC32 crc = new CRC32();
1020
1021        /**
1022         * The input stream decompressing the data for shrunk and imploded entries.
1023         */
1024        private InputStream in;
1025    }
1026
1027    /**
1028     * Bounded input stream adapted from commons-io
1029     */
1030    private class BoundedInputStream extends InputStream {
1031
1032        /** the wrapped input stream */
1033        private final InputStream in;
1034
1035        /** the max length to provide */
1036        private final long max;
1037
1038        /** the number of bytes already returned */
1039        private long pos = 0;
1040    
1041        /**
1042         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1043         * stream and limits it to a certain size.
1044         *
1045         * @param in The wrapped input stream
1046         * @param size The maximum number of bytes to return
1047         */
1048        public BoundedInputStream(final InputStream in, final long size) {
1049            this.max = size;
1050            this.in = in;
1051        }
1052
1053        @Override
1054        public int read() throws IOException {
1055            if (max >= 0 && pos >= max) {
1056                return -1;
1057            }
1058            final int result = in.read();
1059            pos++;
1060            count(1);
1061            current.bytesReadFromStream++;
1062            return result;
1063        }
1064
1065        @Override
1066        public int read(final byte[] b) throws IOException {
1067            return this.read(b, 0, b.length);
1068        }
1069
1070        @Override
1071        public int read(final byte[] b, final int off, final int len) throws IOException {
1072            if (max >= 0 && pos >= max) {
1073                return -1;
1074            }
1075            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1076            final int bytesRead = in.read(b, off, (int) maxRead);
1077
1078            if (bytesRead == -1) {
1079                return -1;
1080            }
1081
1082            pos += bytesRead;
1083            count(bytesRead);
1084            current.bytesReadFromStream += bytesRead;
1085            return bytesRead;
1086        }
1087
1088        @Override
1089        public long skip(final long n) throws IOException {
1090            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1091            final long skippedBytes = in.skip(toSkip);
1092            pos += skippedBytes;
1093            return skippedBytes;
1094        }
1095    
1096        @Override
1097        public int available() throws IOException {
1098            if (max >= 0 && pos >= max) {
1099                return 0;
1100            }
1101            return in.available();
1102        }
1103    }
1104}