001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 020/* 021 * This package is based on the work done by Keiron Liddle, Aftex Software 022 * <keiron@aftexsw.com> to whom the Ant project is very grateful for his 023 * great code. 024 */ 025package org.apache.commons.compress.compressors.bzip2; 026 027import java.io.IOException; 028import java.io.InputStream; 029 030import org.apache.commons.compress.compressors.CompressorInputStream; 031 032/** 033 * An input stream that decompresses from the BZip2 format to be read as any other stream. 034 * 035 * @NotThreadSafe 036 */ 037public class BZip2CompressorInputStream extends CompressorInputStream implements 038 BZip2Constants { 039 040 /** 041 * Index of the last char in the block, so the block size == last + 1. 042 */ 043 private int last; 044 045 /** 046 * Index in zptr[] of original string after sorting. 047 */ 048 private int origPtr; 049 050 /** 051 * always: in the range 0 .. 9. The current block size is 100000 * this 052 * number. 053 */ 054 private int blockSize100k; 055 056 private boolean blockRandomised; 057 058 private int bsBuff; 059 private int bsLive; 060 private final CRC crc = new CRC(); 061 062 private int nInUse; 063 064 private InputStream in; 065 private final boolean decompressConcatenated; 066 067 private static final int EOF = 0; 068 private static final int START_BLOCK_STATE = 1; 069 private static final int RAND_PART_A_STATE = 2; 070 private static final int RAND_PART_B_STATE = 3; 071 private static final int RAND_PART_C_STATE = 4; 072 private static final int NO_RAND_PART_A_STATE = 5; 073 private static final int NO_RAND_PART_B_STATE = 6; 074 private static final int NO_RAND_PART_C_STATE = 7; 075 076 private int currentState = START_BLOCK_STATE; 077 078 private int storedBlockCRC, storedCombinedCRC; 079 private int computedBlockCRC, computedCombinedCRC; 080 081 // Variables used by setup* methods exclusively 082 083 private int su_count; 084 private int su_ch2; 085 private int su_chPrev; 086 private int su_i2; 087 private int su_j2; 088 private int su_rNToGo; 089 private int su_rTPos; 090 private int su_tPos; 091 private char su_z; 092 093 /** 094 * All memory intensive stuff. This field is initialized by initBlock(). 095 */ 096 private BZip2CompressorInputStream.Data data; 097 098 /** 099 * Constructs a new BZip2CompressorInputStream which decompresses bytes 100 * read from the specified stream. This doesn't suppprt decompressing 101 * concatenated .bz2 files. 102 * 103 * @param in the InputStream from which this object should be created 104 * @throws IOException 105 * if the stream content is malformed or an I/O error occurs. 106 * @throws NullPointerException 107 * if {@code in == null} 108 */ 109 public BZip2CompressorInputStream(final InputStream in) throws IOException { 110 this(in, false); 111 } 112 113 /** 114 * Constructs a new BZip2CompressorInputStream which decompresses bytes 115 * read from the specified stream. 116 * 117 * @param in the InputStream from which this object should be created 118 * @param decompressConcatenated 119 * if true, decompress until the end of the input; 120 * if false, stop after the first .bz2 stream and 121 * leave the input position to point to the next 122 * byte after the .bz2 stream 123 * 124 * @throws IOException 125 * if the stream content is malformed or an I/O error occurs. 126 * @throws NullPointerException 127 * if {@code in == null} 128 */ 129 public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException { 130 this.in = in; 131 this.decompressConcatenated = decompressConcatenated; 132 133 init(true); 134 initBlock(); 135 } 136 137 @Override 138 public int read() throws IOException { 139 if (this.in != null) { 140 int r = read0(); 141 count(r < 0 ? -1 : 1); 142 return r; 143 } else { 144 throw new IOException("stream closed"); 145 } 146 } 147 148 /* 149 * (non-Javadoc) 150 * 151 * @see java.io.InputStream#read(byte[], int, int) 152 */ 153 @Override 154 public int read(final byte[] dest, final int offs, final int len) 155 throws IOException { 156 if (offs < 0) { 157 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); 158 } 159 if (len < 0) { 160 throw new IndexOutOfBoundsException("len(" + len + ") < 0."); 161 } 162 if (offs + len > dest.length) { 163 throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" 164 + len + ") > dest.length(" + dest.length + ")."); 165 } 166 if (this.in == null) { 167 throw new IOException("stream closed"); 168 } 169 if (len == 0) { 170 return 0; 171 } 172 173 final int hi = offs + len; 174 int destOffs = offs; 175 int b; 176 while (destOffs < hi && ((b = read0()) >= 0)) { 177 dest[destOffs++] = (byte) b; 178 count(1); 179 } 180 181 int c = (destOffs == offs) ? -1 : (destOffs - offs); 182 return c; 183 } 184 185 private void makeMaps() { 186 final boolean[] inUse = this.data.inUse; 187 final byte[] seqToUnseq = this.data.seqToUnseq; 188 189 int nInUseShadow = 0; 190 191 for (int i = 0; i < 256; i++) { 192 if (inUse[i]) { 193 seqToUnseq[nInUseShadow++] = (byte) i; 194 } 195 } 196 197 this.nInUse = nInUseShadow; 198 } 199 200 private int read0() throws IOException { 201 switch (currentState) { 202 case EOF: 203 return -1; 204 205 case START_BLOCK_STATE: 206 return setupBlock(); 207 208 case RAND_PART_A_STATE: 209 throw new IllegalStateException(); 210 211 case RAND_PART_B_STATE: 212 return setupRandPartB(); 213 214 case RAND_PART_C_STATE: 215 return setupRandPartC(); 216 217 case NO_RAND_PART_A_STATE: 218 throw new IllegalStateException(); 219 220 case NO_RAND_PART_B_STATE: 221 return setupNoRandPartB(); 222 223 case NO_RAND_PART_C_STATE: 224 return setupNoRandPartC(); 225 226 default: 227 throw new IllegalStateException(); 228 } 229 } 230 231 private boolean init(boolean isFirstStream) throws IOException { 232 if (null == in) { 233 throw new IOException("No InputStream"); 234 } 235 236 int magic0 = this.in.read(); 237 if (magic0 == -1 && !isFirstStream) { 238 return false; 239 } 240 int magic1 = this.in.read(); 241 int magic2 = this.in.read(); 242 243 if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') { 244 throw new IOException(isFirstStream 245 ? "Stream is not in the BZip2 format" 246 : "Garbage after a valid BZip2 stream"); 247 } 248 249 int blockSize = this.in.read(); 250 if ((blockSize < '1') || (blockSize > '9')) { 251 throw new IOException("BZip2 block size is invalid"); 252 } 253 254 this.blockSize100k = blockSize - '0'; 255 256 this.bsLive = 0; 257 this.computedCombinedCRC = 0; 258 259 return true; 260 } 261 262 private void initBlock() throws IOException { 263 char magic0; 264 char magic1; 265 char magic2; 266 char magic3; 267 char magic4; 268 char magic5; 269 270 while (true) { 271 // Get the block magic bytes. 272 magic0 = bsGetUByte(); 273 magic1 = bsGetUByte(); 274 magic2 = bsGetUByte(); 275 magic3 = bsGetUByte(); 276 magic4 = bsGetUByte(); 277 magic5 = bsGetUByte(); 278 279 // If isn't end of stream magic, break out of the loop. 280 if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45 281 || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) { 282 break; 283 } 284 285 // End of stream was reached. Check the combined CRC and 286 // advance to the next .bz2 stream if decoding concatenated 287 // streams. 288 if (complete()) { 289 return; 290 } 291 } 292 293 if (magic0 != 0x31 || // '1' 294 magic1 != 0x41 || // ')' 295 magic2 != 0x59 || // 'Y' 296 magic3 != 0x26 || // '&' 297 magic4 != 0x53 || // 'S' 298 magic5 != 0x59 // 'Y' 299 ) { 300 this.currentState = EOF; 301 throw new IOException("bad block header"); 302 } else { 303 this.storedBlockCRC = bsGetInt(); 304 this.blockRandomised = bsR(1) == 1; 305 306 /** 307 * Allocate data here instead in constructor, so we do not allocate 308 * it if the input file is empty. 309 */ 310 if (this.data == null) { 311 this.data = new Data(this.blockSize100k); 312 } 313 314 // currBlockNo++; 315 getAndMoveToFrontDecode(); 316 317 this.crc.initialiseCRC(); 318 this.currentState = START_BLOCK_STATE; 319 } 320 } 321 322 private void endBlock() throws IOException { 323 this.computedBlockCRC = this.crc.getFinalCRC(); 324 325 // A bad CRC is considered a fatal error. 326 if (this.storedBlockCRC != this.computedBlockCRC) { 327 // make next blocks readable without error 328 // (repair feature, not yet documented, not tested) 329 this.computedCombinedCRC = (this.storedCombinedCRC << 1) 330 | (this.storedCombinedCRC >>> 31); 331 this.computedCombinedCRC ^= this.storedBlockCRC; 332 333 throw new IOException("BZip2 CRC error"); 334 } 335 336 this.computedCombinedCRC = (this.computedCombinedCRC << 1) 337 | (this.computedCombinedCRC >>> 31); 338 this.computedCombinedCRC ^= this.computedBlockCRC; 339 } 340 341 private boolean complete() throws IOException { 342 this.storedCombinedCRC = bsGetInt(); 343 this.currentState = EOF; 344 this.data = null; 345 346 if (this.storedCombinedCRC != this.computedCombinedCRC) { 347 throw new IOException("BZip2 CRC error"); 348 } 349 350 // Look for the next .bz2 stream if decompressing 351 // concatenated files. 352 return !decompressConcatenated || !init(false); 353 } 354 355 @Override 356 public void close() throws IOException { 357 InputStream inShadow = this.in; 358 if (inShadow != null) { 359 try { 360 if (inShadow != System.in) { 361 inShadow.close(); 362 } 363 } finally { 364 this.data = null; 365 this.in = null; 366 } 367 } 368 } 369 370 private int bsR(final int n) throws IOException { 371 int bsLiveShadow = this.bsLive; 372 int bsBuffShadow = this.bsBuff; 373 374 if (bsLiveShadow < n) { 375 final InputStream inShadow = this.in; 376 do { 377 int thech = inShadow.read(); 378 379 if (thech < 0) { 380 throw new IOException("unexpected end of stream"); 381 } 382 383 bsBuffShadow = (bsBuffShadow << 8) | thech; 384 bsLiveShadow += 8; 385 } while (bsLiveShadow < n); 386 387 this.bsBuff = bsBuffShadow; 388 } 389 390 this.bsLive = bsLiveShadow - n; 391 return (bsBuffShadow >> (bsLiveShadow - n)) & ((1 << n) - 1); 392 } 393 394 private boolean bsGetBit() throws IOException { 395 int bsLiveShadow = this.bsLive; 396 int bsBuffShadow = this.bsBuff; 397 398 if (bsLiveShadow < 1) { 399 int thech = this.in.read(); 400 401 if (thech < 0) { 402 throw new IOException("unexpected end of stream"); 403 } 404 405 bsBuffShadow = (bsBuffShadow << 8) | thech; 406 bsLiveShadow += 8; 407 this.bsBuff = bsBuffShadow; 408 } 409 410 this.bsLive = bsLiveShadow - 1; 411 return ((bsBuffShadow >> (bsLiveShadow - 1)) & 1) != 0; 412 } 413 414 private char bsGetUByte() throws IOException { 415 return (char) bsR(8); 416 } 417 418 private int bsGetInt() throws IOException { 419 return (((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8); 420 } 421 422 /** 423 * Called by createHuffmanDecodingTables() exclusively. 424 */ 425 private static void hbCreateDecodeTables(final int[] limit, 426 final int[] base, final int[] perm, final char[] length, 427 final int minLen, final int maxLen, final int alphaSize) { 428 for (int i = minLen, pp = 0; i <= maxLen; i++) { 429 for (int j = 0; j < alphaSize; j++) { 430 if (length[j] == i) { 431 perm[pp++] = j; 432 } 433 } 434 } 435 436 for (int i = MAX_CODE_LEN; --i > 0;) { 437 base[i] = 0; 438 limit[i] = 0; 439 } 440 441 for (int i = 0; i < alphaSize; i++) { 442 base[length[i] + 1]++; 443 } 444 445 for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) { 446 b += base[i]; 447 base[i] = b; 448 } 449 450 for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) { 451 final int nb = base[i + 1]; 452 vec += nb - b; 453 b = nb; 454 limit[i] = vec - 1; 455 vec <<= 1; 456 } 457 458 for (int i = minLen + 1; i <= maxLen; i++) { 459 base[i] = ((limit[i - 1] + 1) << 1) - base[i]; 460 } 461 } 462 463 private void recvDecodingTables() throws IOException { 464 final Data dataShadow = this.data; 465 final boolean[] inUse = dataShadow.inUse; 466 final byte[] pos = dataShadow.recvDecodingTables_pos; 467 final byte[] selector = dataShadow.selector; 468 final byte[] selectorMtf = dataShadow.selectorMtf; 469 470 int inUse16 = 0; 471 472 /* Receive the mapping table */ 473 for (int i = 0; i < 16; i++) { 474 if (bsGetBit()) { 475 inUse16 |= 1 << i; 476 } 477 } 478 479 for (int i = 256; --i >= 0;) { 480 inUse[i] = false; 481 } 482 483 for (int i = 0; i < 16; i++) { 484 if ((inUse16 & (1 << i)) != 0) { 485 final int i16 = i << 4; 486 for (int j = 0; j < 16; j++) { 487 if (bsGetBit()) { 488 inUse[i16 + j] = true; 489 } 490 } 491 } 492 } 493 494 makeMaps(); 495 final int alphaSize = this.nInUse + 2; 496 497 /* Now the selectors */ 498 final int nGroups = bsR(3); 499 final int nSelectors = bsR(15); 500 501 for (int i = 0; i < nSelectors; i++) { 502 int j = 0; 503 while (bsGetBit()) { 504 j++; 505 } 506 selectorMtf[i] = (byte) j; 507 } 508 509 /* Undo the MTF values for the selectors. */ 510 for (int v = nGroups; --v >= 0;) { 511 pos[v] = (byte) v; 512 } 513 514 for (int i = 0; i < nSelectors; i++) { 515 int v = selectorMtf[i] & 0xff; 516 final byte tmp = pos[v]; 517 while (v > 0) { 518 // nearly all times v is zero, 4 in most other cases 519 pos[v] = pos[v - 1]; 520 v--; 521 } 522 pos[0] = tmp; 523 selector[i] = tmp; 524 } 525 526 final char[][] len = dataShadow.temp_charArray2d; 527 528 /* Now the coding tables */ 529 for (int t = 0; t < nGroups; t++) { 530 int curr = bsR(5); 531 final char[] len_t = len[t]; 532 for (int i = 0; i < alphaSize; i++) { 533 while (bsGetBit()) { 534 curr += bsGetBit() ? -1 : 1; 535 } 536 len_t[i] = (char) curr; 537 } 538 } 539 540 // finally create the Huffman tables 541 createHuffmanDecodingTables(alphaSize, nGroups); 542 } 543 544 /** 545 * Called by recvDecodingTables() exclusively. 546 */ 547 private void createHuffmanDecodingTables(final int alphaSize, 548 final int nGroups) { 549 final Data dataShadow = this.data; 550 final char[][] len = dataShadow.temp_charArray2d; 551 final int[] minLens = dataShadow.minLens; 552 final int[][] limit = dataShadow.limit; 553 final int[][] base = dataShadow.base; 554 final int[][] perm = dataShadow.perm; 555 556 for (int t = 0; t < nGroups; t++) { 557 int minLen = 32; 558 int maxLen = 0; 559 final char[] len_t = len[t]; 560 for (int i = alphaSize; --i >= 0;) { 561 final char lent = len_t[i]; 562 if (lent > maxLen) { 563 maxLen = lent; 564 } 565 if (lent < minLen) { 566 minLen = lent; 567 } 568 } 569 hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, 570 maxLen, alphaSize); 571 minLens[t] = minLen; 572 } 573 } 574 575 private void getAndMoveToFrontDecode() throws IOException { 576 this.origPtr = bsR(24); 577 recvDecodingTables(); 578 579 final InputStream inShadow = this.in; 580 final Data dataShadow = this.data; 581 final byte[] ll8 = dataShadow.ll8; 582 final int[] unzftab = dataShadow.unzftab; 583 final byte[] selector = dataShadow.selector; 584 final byte[] seqToUnseq = dataShadow.seqToUnseq; 585 final char[] yy = dataShadow.getAndMoveToFrontDecode_yy; 586 final int[] minLens = dataShadow.minLens; 587 final int[][] limit = dataShadow.limit; 588 final int[][] base = dataShadow.base; 589 final int[][] perm = dataShadow.perm; 590 final int limitLast = this.blockSize100k * 100000; 591 592 /* 593 * Setting up the unzftab entries here is not strictly necessary, but it 594 * does save having to do it later in a separate pass, and so saves a 595 * block's worth of cache misses. 596 */ 597 for (int i = 256; --i >= 0;) { 598 yy[i] = (char) i; 599 unzftab[i] = 0; 600 } 601 602 int groupNo = 0; 603 int groupPos = G_SIZE - 1; 604 final int eob = this.nInUse + 1; 605 int nextSym = getAndMoveToFrontDecode0(0); 606 int bsBuffShadow = this.bsBuff; 607 int bsLiveShadow = this.bsLive; 608 int lastShadow = -1; 609 int zt = selector[groupNo] & 0xff; 610 int[] base_zt = base[zt]; 611 int[] limit_zt = limit[zt]; 612 int[] perm_zt = perm[zt]; 613 int minLens_zt = minLens[zt]; 614 615 while (nextSym != eob) { 616 if ((nextSym == RUNA) || (nextSym == RUNB)) { 617 int s = -1; 618 619 for (int n = 1; true; n <<= 1) { 620 if (nextSym == RUNA) { 621 s += n; 622 } else if (nextSym == RUNB) { 623 s += n << 1; 624 } else { 625 break; 626 } 627 628 if (groupPos == 0) { 629 groupPos = G_SIZE - 1; 630 zt = selector[++groupNo] & 0xff; 631 base_zt = base[zt]; 632 limit_zt = limit[zt]; 633 perm_zt = perm[zt]; 634 minLens_zt = minLens[zt]; 635 } else { 636 groupPos--; 637 } 638 639 int zn = minLens_zt; 640 641 // Inlined: 642 // int zvec = bsR(zn); 643 while (bsLiveShadow < zn) { 644 final int thech = inShadow.read(); 645 if (thech >= 0) { 646 bsBuffShadow = (bsBuffShadow << 8) | thech; 647 bsLiveShadow += 8; 648 continue; 649 } else { 650 throw new IOException("unexpected end of stream"); 651 } 652 } 653 int zvec = (bsBuffShadow >> (bsLiveShadow - zn)) 654 & ((1 << zn) - 1); 655 bsLiveShadow -= zn; 656 657 while (zvec > limit_zt[zn]) { 658 zn++; 659 while (bsLiveShadow < 1) { 660 final int thech = inShadow.read(); 661 if (thech >= 0) { 662 bsBuffShadow = (bsBuffShadow << 8) | thech; 663 bsLiveShadow += 8; 664 continue; 665 } else { 666 throw new IOException( 667 "unexpected end of stream"); 668 } 669 } 670 bsLiveShadow--; 671 zvec = (zvec << 1) 672 | ((bsBuffShadow >> bsLiveShadow) & 1); 673 } 674 nextSym = perm_zt[zvec - base_zt[zn]]; 675 } 676 677 final byte ch = seqToUnseq[yy[0]]; 678 unzftab[ch & 0xff] += s + 1; 679 680 while (s-- >= 0) { 681 ll8[++lastShadow] = ch; 682 } 683 684 if (lastShadow >= limitLast) { 685 throw new IOException("block overrun"); 686 } 687 } else { 688 if (++lastShadow >= limitLast) { 689 throw new IOException("block overrun"); 690 } 691 692 final char tmp = yy[nextSym - 1]; 693 unzftab[seqToUnseq[tmp] & 0xff]++; 694 ll8[lastShadow] = seqToUnseq[tmp]; 695 696 /* 697 * This loop is hammered during decompression, hence avoid 698 * native method call overhead of System.arraycopy for very 699 * small ranges to copy. 700 */ 701 if (nextSym <= 16) { 702 for (int j = nextSym - 1; j > 0;) { 703 yy[j] = yy[--j]; 704 } 705 } else { 706 System.arraycopy(yy, 0, yy, 1, nextSym - 1); 707 } 708 709 yy[0] = tmp; 710 711 if (groupPos == 0) { 712 groupPos = G_SIZE - 1; 713 zt = selector[++groupNo] & 0xff; 714 base_zt = base[zt]; 715 limit_zt = limit[zt]; 716 perm_zt = perm[zt]; 717 minLens_zt = minLens[zt]; 718 } else { 719 groupPos--; 720 } 721 722 int zn = minLens_zt; 723 724 // Inlined: 725 // int zvec = bsR(zn); 726 while (bsLiveShadow < zn) { 727 final int thech = inShadow.read(); 728 if (thech >= 0) { 729 bsBuffShadow = (bsBuffShadow << 8) | thech; 730 bsLiveShadow += 8; 731 continue; 732 } else { 733 throw new IOException("unexpected end of stream"); 734 } 735 } 736 int zvec = (bsBuffShadow >> (bsLiveShadow - zn)) 737 & ((1 << zn) - 1); 738 bsLiveShadow -= zn; 739 740 while (zvec > limit_zt[zn]) { 741 zn++; 742 while (bsLiveShadow < 1) { 743 final int thech = inShadow.read(); 744 if (thech >= 0) { 745 bsBuffShadow = (bsBuffShadow << 8) | thech; 746 bsLiveShadow += 8; 747 continue; 748 } else { 749 throw new IOException("unexpected end of stream"); 750 } 751 } 752 bsLiveShadow--; 753 zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1); 754 } 755 nextSym = perm_zt[zvec - base_zt[zn]]; 756 } 757 } 758 759 this.last = lastShadow; 760 this.bsLive = bsLiveShadow; 761 this.bsBuff = bsBuffShadow; 762 } 763 764 private int getAndMoveToFrontDecode0(final int groupNo) throws IOException { 765 final InputStream inShadow = this.in; 766 final Data dataShadow = this.data; 767 final int zt = dataShadow.selector[groupNo] & 0xff; 768 final int[] limit_zt = dataShadow.limit[zt]; 769 int zn = dataShadow.minLens[zt]; 770 int zvec = bsR(zn); 771 int bsLiveShadow = this.bsLive; 772 int bsBuffShadow = this.bsBuff; 773 774 while (zvec > limit_zt[zn]) { 775 zn++; 776 while (bsLiveShadow < 1) { 777 final int thech = inShadow.read(); 778 779 if (thech >= 0) { 780 bsBuffShadow = (bsBuffShadow << 8) | thech; 781 bsLiveShadow += 8; 782 continue; 783 } else { 784 throw new IOException("unexpected end of stream"); 785 } 786 } 787 bsLiveShadow--; 788 zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1); 789 } 790 791 this.bsLive = bsLiveShadow; 792 this.bsBuff = bsBuffShadow; 793 794 return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]]; 795 } 796 797 private int setupBlock() throws IOException { 798 if (currentState == EOF || this.data == null) { 799 return -1; 800 } 801 802 final int[] cftab = this.data.cftab; 803 final int[] tt = this.data.initTT(this.last + 1); 804 final byte[] ll8 = this.data.ll8; 805 cftab[0] = 0; 806 System.arraycopy(this.data.unzftab, 0, cftab, 1, 256); 807 808 for (int i = 1, c = cftab[0]; i <= 256; i++) { 809 c += cftab[i]; 810 cftab[i] = c; 811 } 812 813 for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { 814 tt[cftab[ll8[i] & 0xff]++] = i; 815 } 816 817 if ((this.origPtr < 0) || (this.origPtr >= tt.length)) { 818 throw new IOException("stream corrupted"); 819 } 820 821 this.su_tPos = tt[this.origPtr]; 822 this.su_count = 0; 823 this.su_i2 = 0; 824 this.su_ch2 = 256; /* not a char and not EOF */ 825 826 if (this.blockRandomised) { 827 this.su_rNToGo = 0; 828 this.su_rTPos = 0; 829 return setupRandPartA(); 830 } 831 return setupNoRandPartA(); 832 } 833 834 private int setupRandPartA() throws IOException { 835 if (this.su_i2 <= this.last) { 836 this.su_chPrev = this.su_ch2; 837 int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; 838 this.su_tPos = this.data.tt[this.su_tPos]; 839 if (this.su_rNToGo == 0) { 840 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1; 841 if (++this.su_rTPos == 512) { 842 this.su_rTPos = 0; 843 } 844 } else { 845 this.su_rNToGo--; 846 } 847 this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0; 848 this.su_i2++; 849 this.currentState = RAND_PART_B_STATE; 850 this.crc.updateCRC(su_ch2Shadow); 851 return su_ch2Shadow; 852 } else { 853 endBlock(); 854 initBlock(); 855 return setupBlock(); 856 } 857 } 858 859 private int setupNoRandPartA() throws IOException { 860 if (this.su_i2 <= this.last) { 861 this.su_chPrev = this.su_ch2; 862 int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; 863 this.su_ch2 = su_ch2Shadow; 864 this.su_tPos = this.data.tt[this.su_tPos]; 865 this.su_i2++; 866 this.currentState = NO_RAND_PART_B_STATE; 867 this.crc.updateCRC(su_ch2Shadow); 868 return su_ch2Shadow; 869 } else { 870 this.currentState = NO_RAND_PART_A_STATE; 871 endBlock(); 872 initBlock(); 873 return setupBlock(); 874 } 875 } 876 877 private int setupRandPartB() throws IOException { 878 if (this.su_ch2 != this.su_chPrev) { 879 this.currentState = RAND_PART_A_STATE; 880 this.su_count = 1; 881 return setupRandPartA(); 882 } else if (++this.su_count >= 4) { 883 this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); 884 this.su_tPos = this.data.tt[this.su_tPos]; 885 if (this.su_rNToGo == 0) { 886 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1; 887 if (++this.su_rTPos == 512) { 888 this.su_rTPos = 0; 889 } 890 } else { 891 this.su_rNToGo--; 892 } 893 this.su_j2 = 0; 894 this.currentState = RAND_PART_C_STATE; 895 if (this.su_rNToGo == 1) { 896 this.su_z ^= 1; 897 } 898 return setupRandPartC(); 899 } else { 900 this.currentState = RAND_PART_A_STATE; 901 return setupRandPartA(); 902 } 903 } 904 905 private int setupRandPartC() throws IOException { 906 if (this.su_j2 < this.su_z) { 907 this.crc.updateCRC(this.su_ch2); 908 this.su_j2++; 909 return this.su_ch2; 910 } else { 911 this.currentState = RAND_PART_A_STATE; 912 this.su_i2++; 913 this.su_count = 0; 914 return setupRandPartA(); 915 } 916 } 917 918 private int setupNoRandPartB() throws IOException { 919 if (this.su_ch2 != this.su_chPrev) { 920 this.su_count = 1; 921 return setupNoRandPartA(); 922 } else if (++this.su_count >= 4) { 923 this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); 924 this.su_tPos = this.data.tt[this.su_tPos]; 925 this.su_j2 = 0; 926 return setupNoRandPartC(); 927 } else { 928 return setupNoRandPartA(); 929 } 930 } 931 932 private int setupNoRandPartC() throws IOException { 933 if (this.su_j2 < this.su_z) { 934 int su_ch2Shadow = this.su_ch2; 935 this.crc.updateCRC(su_ch2Shadow); 936 this.su_j2++; 937 this.currentState = NO_RAND_PART_C_STATE; 938 return su_ch2Shadow; 939 } else { 940 this.su_i2++; 941 this.su_count = 0; 942 return setupNoRandPartA(); 943 } 944 } 945 946 private static final class Data { 947 948 // (with blockSize 900k) 949 final boolean[] inUse = new boolean[256]; // 256 byte 950 951 final byte[] seqToUnseq = new byte[256]; // 256 byte 952 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte 953 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte 954 955 /** 956 * Freq table collected to save a pass over the data during 957 * decompression. 958 */ 959 final int[] unzftab = new int[256]; // 1024 byte 960 961 final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 962 final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 963 final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 964 final int[] minLens = new int[N_GROUPS]; // 24 byte 965 966 final int[] cftab = new int[257]; // 1028 byte 967 final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte 968 final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096 969 // byte 970 final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte 971 // --------------- 972 // 60798 byte 973 974 int[] tt; // 3600000 byte 975 byte[] ll8; // 900000 byte 976 977 // --------------- 978 // 4560782 byte 979 // =============== 980 981 Data(int blockSize100k) { 982 this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE]; 983 } 984 985 /** 986 * Initializes the {@link #tt} array. 987 * 988 * This method is called when the required length of the array is known. 989 * I don't initialize it at construction time to avoid unneccessary 990 * memory allocation when compressing small files. 991 */ 992 int[] initTT(int length) { 993 int[] ttShadow = this.tt; 994 995 // tt.length should always be >= length, but theoretically 996 // it can happen, if the compressor mixed small and large 997 // blocks. Normally only the last block will be smaller 998 // than others. 999 if ((ttShadow == null) || (ttShadow.length < length)) { 1000 this.tt = ttShadow = new int[length]; 1001 } 1002 1003 return ttShadow; 1004 } 1005 1006 } 1007 1008 /** 1009 * Checks if the signature matches what is expected for a bzip2 file. 1010 * 1011 * @param signature 1012 * the bytes to check 1013 * @param length 1014 * the number of bytes to check 1015 * @return true, if this stream is a bzip2 compressed stream, false otherwise 1016 * 1017 * @since 1.1 1018 */ 1019 public static boolean matches(byte[] signature, int length) { 1020 1021 if (length < 3) { 1022 return false; 1023 } 1024 1025 if (signature[0] != 'B') { 1026 return false; 1027 } 1028 1029 if (signature[1] != 'Z') { 1030 return false; 1031 } 1032 1033 if (signature[2] != 'h') { 1034 return false; 1035 } 1036 1037 return true; 1038 } 1039}