111 lines
3 KiB
Go
111 lines
3 KiB
Go
|
// Copyright 2015, Joe Tsai. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE.md file.
|
||
|
|
||
|
// Package bzip2 implements the BZip2 compressed data format.
|
||
|
//
|
||
|
// Canonical C implementation:
|
||
|
// http://bzip.org
|
||
|
//
|
||
|
// Unofficial format specification:
|
||
|
// https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf
|
||
|
package bzip2
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"hash/crc32"
|
||
|
|
||
|
"github.com/dsnet/compress/internal"
|
||
|
"github.com/dsnet/compress/internal/errors"
|
||
|
)
|
||
|
|
||
|
// There does not exist a formal specification of the BZip2 format. As such,
|
||
|
// much of this work is derived by either reverse engineering the original C
|
||
|
// source code or using secondary sources.
|
||
|
//
|
||
|
// Significant amounts of fuzz testing is done to ensure that outputs from
|
||
|
// this package is properly decoded by the C library. Furthermore, we test that
|
||
|
// both this package and the C library agree about what inputs are invalid.
|
||
|
//
|
||
|
// Compression stack:
|
||
|
// Run-length encoding 1 (RLE1)
|
||
|
// Burrows-Wheeler transform (BWT)
|
||
|
// Move-to-front transform (MTF)
|
||
|
// Run-length encoding 2 (RLE2)
|
||
|
// Prefix encoding (PE)
|
||
|
//
|
||
|
// References:
|
||
|
// http://bzip.org/
|
||
|
// https://en.wikipedia.org/wiki/Bzip2
|
||
|
// https://code.google.com/p/jbzip2/
|
||
|
|
||
|
const (
|
||
|
BestSpeed = 1
|
||
|
BestCompression = 9
|
||
|
DefaultCompression = 6
|
||
|
)
|
||
|
|
||
|
const (
|
||
|
hdrMagic = 0x425a // Hex of "BZ"
|
||
|
blkMagic = 0x314159265359 // BCD of PI
|
||
|
endMagic = 0x177245385090 // BCD of sqrt(PI)
|
||
|
|
||
|
blockSize = 100000
|
||
|
)
|
||
|
|
||
|
func errorf(c int, f string, a ...interface{}) error {
|
||
|
return errors.Error{Code: c, Pkg: "bzip2", Msg: fmt.Sprintf(f, a...)}
|
||
|
}
|
||
|
|
||
|
func panicf(c int, f string, a ...interface{}) {
|
||
|
errors.Panic(errorf(c, f, a...))
|
||
|
}
|
||
|
|
||
|
// errWrap converts a lower-level errors.Error to be one from this package.
|
||
|
// The replaceCode passed in will be used to replace the code for any errors
|
||
|
// with the errors.Invalid code.
|
||
|
//
|
||
|
// For the Reader, set this to errors.Corrupted.
|
||
|
// For the Writer, set this to errors.Internal.
|
||
|
func errWrap(err error, replaceCode int) error {
|
||
|
if cerr, ok := err.(errors.Error); ok {
|
||
|
if errors.IsInvalid(cerr) {
|
||
|
cerr.Code = replaceCode
|
||
|
}
|
||
|
err = errorf(cerr.Code, "%s", cerr.Msg)
|
||
|
}
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
var errClosed = errorf(errors.Closed, "")
|
||
|
|
||
|
// crc computes the CRC-32 used by BZip2.
|
||
|
//
|
||
|
// The CRC-32 computation in bzip2 treats bytes as having bits in big-endian
|
||
|
// order. That is, the MSB is read before the LSB. Thus, we can use the
|
||
|
// standard library version of CRC-32 IEEE with some minor adjustments.
|
||
|
//
|
||
|
// The byte array is used as an intermediate buffer to swap the bits of every
|
||
|
// byte of the input.
|
||
|
type crc struct {
|
||
|
val uint32
|
||
|
buf [256]byte
|
||
|
}
|
||
|
|
||
|
// update computes the CRC-32 of appending buf to c.
|
||
|
func (c *crc) update(buf []byte) {
|
||
|
cval := internal.ReverseUint32(c.val)
|
||
|
for len(buf) > 0 {
|
||
|
n := len(buf)
|
||
|
if n > len(c.buf) {
|
||
|
n = len(c.buf)
|
||
|
}
|
||
|
for i, b := range buf[:n] {
|
||
|
c.buf[i] = internal.ReverseLUT[b]
|
||
|
}
|
||
|
cval = crc32.Update(cval, crc32.IEEETable, c.buf[:n])
|
||
|
buf = buf[n:]
|
||
|
}
|
||
|
c.val = internal.ReverseUint32(cval)
|
||
|
}
|