001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lzw;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.nio.ByteOrder;
024
025import org.apache.commons.compress.compressors.CompressorInputStream;
026import org.apache.commons.compress.utils.BitInputStream;
027
028/**
029 * <p>Generic LZW implementation. It is used internally for
030 * the Z decompressor and the Unshrinking Zip file compression method,
031 * but may be useful for third-party projects in implementing their own LZW variations.</p>
032 *
033 * @NotThreadSafe
034 * @since 1.10
035 */
036public abstract class LZWInputStream extends CompressorInputStream {
037    protected static final int DEFAULT_CODE_SIZE = 9;
038    protected static final int UNUSED_PREFIX = -1;
039
040    private final byte[] oneByte = new byte[1];
041
042    protected final BitInputStream in;
043    private int clearCode = -1;
044    private int codeSize = DEFAULT_CODE_SIZE;
045    private byte previousCodeFirstChar;
046    private int previousCode = UNUSED_PREFIX;
047    private int tableSize;
048    private int[] prefixes;
049    private byte[] characters;
050    private byte[] outputStack;
051    private int outputStackLocation;
052
053    protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) {
054        this.in = new BitInputStream(inputStream, byteOrder);
055    }
056
057    @Override
058    public void close() throws IOException {
059        in.close();
060    }
061    
062    @Override
063    public int read() throws IOException {
064        int ret = read(oneByte);
065        if (ret < 0) {
066            return ret;
067        }
068        return 0xff & oneByte[0];
069    }
070    
071    @Override
072    public int read(byte[] b, int off, int len) throws IOException {
073        int bytesRead = readFromStack(b, off, len);
074        while (len - bytesRead > 0) {
075            int result = decompressNextSymbol();
076            if (result < 0) {
077                if (bytesRead > 0) {
078                    count(bytesRead);
079                    return bytesRead;
080                }
081                return result;
082            }
083            bytesRead += readFromStack(b, off + bytesRead, len - bytesRead);
084        }
085        count(bytesRead);
086        return bytesRead;
087    }
088
089    /**
090     * Read the next code and expand it.
091     */
092    protected abstract int decompressNextSymbol() throws IOException;
093
094    /**
095     * Add a new entry to the dictionary.
096     */
097    protected abstract int addEntry(int previousCode, byte character)
098        throws IOException;
099
100    /**
101     * Sets the clear code based on the code size.
102     */
103    protected void setClearCode(int codeSize) {
104        clearCode = (1 << (codeSize - 1));
105    }
106
107    /**
108     * Initializes the arrays based on the maximum code size.
109     */
110    protected void initializeTables(int maxCodeSize) {
111        final int maxTableSize = 1 << maxCodeSize;
112        prefixes = new int[maxTableSize];
113        characters = new byte[maxTableSize];
114        outputStack = new byte[maxTableSize];
115        outputStackLocation = maxTableSize;
116        final int max = 1 << 8;
117        for (int i = 0; i < max; i++) {
118            prefixes[i] = -1;
119            characters[i] = (byte) i;
120        }
121    }
122
123    /**
124     * Reads the next code from the stream.
125     */
126    protected int readNextCode() throws IOException {
127        if (codeSize > 31) {
128            throw new IllegalArgumentException("code size must not be bigger than 31");
129        }
130        return (int) in.readBits(codeSize);
131    }
132    
133    /**
134     * Adds a new entry if the maximum table size hasn't been exceeded
135     * and returns the new index.
136     */
137    protected int addEntry(int previousCode, byte character, int maxTableSize) {
138        if (tableSize < maxTableSize) {
139            prefixes[tableSize] = previousCode;
140            characters[tableSize] = character;
141            return tableSize++;
142        }
143        return -1;
144    }
145
146    /**
147     * Add entry for repeat of previousCode we haven't added, yet.
148     */
149    protected int addRepeatOfPreviousCode() throws IOException {
150        if (previousCode == -1) {
151            // can't have a repeat for the very first code
152            throw new IOException("The first code can't be a reference to its preceding code");
153        }
154        return addEntry(previousCode, previousCodeFirstChar);
155    }
156
157    /**
158     * Expands the entry with index code to the output stack and may
159     * create a new entry
160     */
161    protected int expandCodeToOutputStack(int code, boolean addedUnfinishedEntry)
162        throws IOException {
163        for (int entry = code; entry >= 0; entry = prefixes[entry]) {
164            outputStack[--outputStackLocation] = characters[entry];
165        }
166        if (previousCode != -1 && !addedUnfinishedEntry) {
167            addEntry(previousCode, outputStack[outputStackLocation]);
168        }
169        previousCode = code;
170        previousCodeFirstChar = outputStack[outputStackLocation];
171        return outputStackLocation;
172    }
173
174    private int readFromStack(byte[] b, int off, int len) {
175        int remainingInStack = outputStack.length - outputStackLocation;
176        if (remainingInStack > 0) {
177            int maxLength = Math.min(remainingInStack, len);
178            System.arraycopy(outputStack, outputStackLocation, b, off, maxLength);
179            outputStackLocation += maxLength;
180            return maxLength;
181        }
182        return 0;
183    }
184
185    protected int getCodeSize() {
186        return codeSize;
187    }
188
189    protected void resetCodeSize() {
190        setCodeSize(DEFAULT_CODE_SIZE);
191    }
192
193    protected void setCodeSize(int cs) {
194        this.codeSize = cs;
195    }
196
197    protected void incrementCodeSize() {
198        codeSize++;
199    }
200
201    protected void resetPreviousCode() {
202        this.previousCode = -1;
203    }
204
205    protected int getPrefix(int offset) {
206        return prefixes[offset];
207    }
208
209    protected void setPrefix(int offset, int value) {
210        prefixes[offset] = value;
211    }
212
213    protected int getPrefixesLength() {
214        return prefixes.length;
215    }
216
217    protected int getClearCode() {
218        return clearCode;
219    }
220
221    protected int getTableSize() {
222        return tableSize;
223    }
224
225    protected void setTableSize(int newSize) {
226        tableSize = newSize;
227    }
228
229}