diff --git a/go.mod b/go.mod index 3fc5f5e..e647b3d 100644 --- a/go.mod +++ b/go.mod @@ -3,25 +3,25 @@ module github.com/DroidsOnRoids/bitrise-step-flutter go 1.17 require ( - github.com/bitrise-io/go-utils v0.0.0-20200224122728-e212188d99b4 + github.com/bitrise-io/go-utils v1.0.2 github.com/bitrise-tools/go-steputils v0.0.0-20200227150459-94490ca44ddb github.com/blang/semver v3.5.1+incompatible - github.com/magiconair/properties v1.8.2-0.20200610093828-cfba716d4c41 + github.com/magiconair/properties v1.8.6 github.com/mholt/archiver v1.1.3-0.20190917152942-233fc16b9615 - github.com/stretchr/testify v1.6.1 + github.com/stretchr/testify v1.7.0 ) require ( - github.com/andybalholm/brotli v1.0.0 // indirect + github.com/andybalholm/brotli v1.0.4 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dsnet/compress v0.0.1 // indirect - github.com/golang/snappy v0.0.1 // indirect - github.com/klauspost/compress v1.10.10 // indirect - github.com/klauspost/pgzip v1.2.4 // indirect - github.com/nwaples/rardecode v1.1.0 // indirect - github.com/pierrec/lz4 v2.5.2+incompatible // indirect + github.com/golang/snappy v0.0.4 // indirect + github.com/klauspost/compress v1.15.4 // indirect + github.com/klauspost/pgzip v1.2.5 // indirect + github.com/nwaples/rardecode v1.1.3 // indirect + github.com/pierrec/lz4 v2.6.1+incompatible // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/ulikunitz/xz v0.5.7 // indirect + github.com/ulikunitz/xz v0.5.10 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect - gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 // indirect + gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect ) diff --git a/go.sum b/go.sum index e200a9e..c987c40 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,12 @@ github.com/andybalholm/brotli v0.0.0-20190621154722-5f990b63d2d6/go.mod h1:+lx6/Aqd1kLJ1GQfkvOnaZ1WGmLpMpbprPuIOOZX30U= github.com/andybalholm/brotli v1.0.0 h1:7UCwP93aiSfvWpapti8g88vVVGp2qqtGyePsSuDafo4= github.com/andybalholm/brotli v1.0.0/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= +github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= +github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/bitrise-io/go-utils v0.0.0-20200224122728-e212188d99b4 h1:35ImX3SrDgRYVDPue7NhybnQ3quuVrTQzjjul+R3aUI= github.com/bitrise-io/go-utils v0.0.0-20200224122728-e212188d99b4/go.mod h1:tTEsKvbz1LbzuN/KpVFHXnLtcAPdEgIdM41s0lL407s= +github.com/bitrise-io/go-utils v1.0.2 h1:w4Mz2IvrgDzrFJECuHdvsK1LHO30cdtuy9bBa7Lw2c0= +github.com/bitrise-io/go-utils v1.0.2/go.mod h1:ZY1DI+fEpZuFpO9szgDeICM4QbqoWVt0RSY3tRI1heY= github.com/bitrise-tools/go-steputils v0.0.0-20200227150459-94490ca44ddb h1:5zeXRDfZntzK+A2MNdNoU1uuueOukoC/Lh3N3dZICSI= github.com/bitrise-tools/go-steputils v0.0.0-20200227150459-94490ca44ddb/go.mod h1:4eEx1Bd2AAVAN/YGK4V066jO7ekrsLymkyQZzx2UEwQ= github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ= @@ -16,49 +20,82 @@ github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdf github.com/golang/gddo v0.0.0-20190419222130-af0f2af80721/go.mod h1:xEhNfoBDX1hzLm2Nf80qUvZ2sVwoMZ8d6IE2SrsQfh4= github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= +github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= +github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= +github.com/hashicorp/go-retryablehttp v0.7.0/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY= github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.7.4/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.10.10 h1:a/y8CglcM7gLGYmlbP/stPE5sR3hbhFRUjCBfd/0B3I= github.com/klauspost/compress v1.10.10/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.15.4 h1:1kn4/7MepF/CHmYub99/nNX8az0IJjfSOU/jbnTVfqQ= +github.com/klauspost/compress v1.15.4/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/pgzip v1.2.1/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/klauspost/pgzip v1.2.4 h1:TQ7CNpYKovDOmqzRHKxJh0BeaBI7UdQZYc6p7pMQh1A= github.com/klauspost/pgzip v1.2.4/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= +github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= +github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/magiconair/properties v1.8.2-0.20200610093828-cfba716d4c41 h1:FZWX5EyBobAddDAP0XahKO1LrWKGB5AaNpVO4kdyXdA= github.com/magiconair/properties v1.8.2-0.20200610093828-cfba716d4c41/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= +github.com/magiconair/properties v1.8.6 h1:5ibWZ6iY0NctNGWo87LalDlEZ6R41TqbbDamhfG/Qzo= +github.com/magiconair/properties v1.8.6/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= github.com/mholt/archiver v1.1.3-0.20190917152942-233fc16b9615 h1:2C0a9oPTqjpvyg9UCGP6Xan2fR4RUb/LfrNaLAcKT50= github.com/mholt/archiver v1.1.3-0.20190917152942-233fc16b9615/go.mod h1:15A2vUgLkyQMX8s3CbMj8aYNqkegWVTi4xXE46UGHNY= github.com/nwaples/rardecode v1.0.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ= github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= +github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc= +github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pierrec/lz4 v2.5.2+incompatible h1:WCjObylUIOlKy/+7Abdn34TLIkXiA4UWUMhxq9m9ZXI= github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= +github.com/pierrec/lz4 v2.6.1+incompatible h1:9UY3+iC23yxF0UfGaYrGplQ+79Rg+h/q9FV9ix19jjM= +github.com/pierrec/lz4 v2.6.1+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.3.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8= github.com/ulikunitz/xz v0.5.7 h1:YvTNdFzX6+W5m9msiYg/zpkSURPPtOlzbqYjrFn7Yt4= github.com/ulikunitz/xz v0.5.7/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= +github.com/ulikunitz/xz v0.5.10 h1:t92gobL9l3HE202wg3rlk19F6X+JOxl9BBrCCMYEYd8= +github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo= github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200220183623-bac4c82f6975/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20211202192323-5770296d904e/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200219091948-cb0a6d8edb6c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200220224806-8a925fa4c0df/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= @@ -66,6 +103,9 @@ golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8T gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 h1:tQIYjPdBoyREyB9XMu+nnTclpTYkz2zFM+lzLJFO4gQ= gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go index 93537dd..48843a5 100644 --- a/main.go +++ b/main.go @@ -84,7 +84,7 @@ func downloadAndExtractReleaseSdk(flutterVersion, flutterSdkDestinationDir strin channel := versionComponents[len(versionComponents)-1] flutterSdkSourceURL := fmt.Sprintf( - "https://storage.googleapis.com/flutter_infra/releases/%s/%s/flutter_%s_v%s.%s", + "https://storage.googleapis.com/flutter_infra_release/releases/%s/%s/flutter_%s_v%s.%s", channel, getFlutterPlatform(), getFlutterPlatform(), diff --git a/vendor/github.com/andybalholm/brotli/README.md b/vendor/github.com/andybalholm/brotli/README.md index 02d115e..1ea7fdb 100644 --- a/vendor/github.com/andybalholm/brotli/README.md +++ b/vendor/github.com/andybalholm/brotli/README.md @@ -3,3 +3,5 @@ It was translated from the reference implementation (https://github.com/google/b with the `c2go` tool at https://github.com/andybalholm/c2go. I am using it in production with https://github.com/andybalholm/redwood. + +API documentation is found at https://pkg.go.dev/github.com/andybalholm/brotli?tab=doc. diff --git a/vendor/github.com/andybalholm/brotli/backward_references.go b/vendor/github.com/andybalholm/brotli/backward_references.go index 1642e47..008c054 100644 --- a/vendor/github.com/andybalholm/brotli/backward_references.go +++ b/vendor/github.com/andybalholm/brotli/backward_references.go @@ -1,5 +1,9 @@ package brotli +import ( + "sync" +) + /* Copyright 2013 Google Inc. All Rights Reserved. Distributed under MIT license. @@ -31,13 +35,10 @@ func computeDistanceCode(distance uint, max_distance uint, dist_cache []int) uin return distance + numDistanceShortCodes - 1 } -/* "commands" points to the next output command to write to, "*num_commands" is - initially the total amount of commands output by previous - CreateBackwardReferences calls, and must be incremented by the amount written - by this call. */ -func createBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, hasher hasherHandle, dist_cache []int, last_insert_len *uint, commands []command, num_commands *uint, num_literals *uint) { +var hasherSearchResultPool sync.Pool + +func createBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, hasher hasherHandle, dist_cache []int, last_insert_len *uint, commands *[]command, num_literals *uint) { var max_backward_limit uint = maxBackwardLimit(params.lgwin) - var orig_commands []command = commands var insert_length uint = *last_insert_len var pos_end uint = position + num_bytes var store_end uint @@ -57,8 +58,14 @@ func createBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, /* Minimum score to accept a backward reference. */ hasher.PrepareDistanceCache(dist_cache) - var sr2 hasherSearchResult - var sr hasherSearchResult + sr2, _ := hasherSearchResultPool.Get().(*hasherSearchResult) + if sr2 == nil { + sr2 = &hasherSearchResult{} + } + sr, _ := hasherSearchResultPool.Get().(*hasherSearchResult) + if sr == nil { + sr = &hasherSearchResult{} + } for position+hasher.HashTypeLength() < pos_end { var max_length uint = pos_end - position @@ -67,7 +74,7 @@ func createBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, sr.len_code_delta = 0 sr.distance = 0 sr.score = kMinScore - hasher.FindLongestMatch(¶ms.dictionary, ringbuffer, ringbuffer_mask, dist_cache, position, max_length, max_distance, gap, params.dist.max_distance, &sr) + hasher.FindLongestMatch(¶ms.dictionary, ringbuffer, ringbuffer_mask, dist_cache, position, max_length, max_distance, gap, params.dist.max_distance, sr) if sr.score > kMinScore { /* Found a match. Let's look for something even better ahead. */ var delayed_backward_references_in_row int = 0 @@ -83,14 +90,14 @@ func createBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, sr2.distance = 0 sr2.score = kMinScore max_distance = brotli_min_size_t(position+1, max_backward_limit) - hasher.FindLongestMatch(¶ms.dictionary, ringbuffer, ringbuffer_mask, dist_cache, position+1, max_length, max_distance, gap, params.dist.max_distance, &sr2) + hasher.FindLongestMatch(¶ms.dictionary, ringbuffer, ringbuffer_mask, dist_cache, position+1, max_length, max_distance, gap, params.dist.max_distance, sr2) if sr2.score >= sr.score+cost_diff_lazy { /* Ok, let's just write one byte for now and start a match from the next byte. */ position++ insert_length++ - sr = sr2 + *sr = *sr2 delayed_backward_references_in_row++ if delayed_backward_references_in_row < 4 && position+hasher.HashTypeLength() < pos_end { continue @@ -114,8 +121,7 @@ func createBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, hasher.PrepareDistanceCache(dist_cache) } - initCommand(&commands[0], ¶ms.dist, insert_length, sr.len, sr.len_code_delta, distance_code) - commands = commands[1:] + *commands = append(*commands, makeCommand(¶ms.dist, insert_length, sr.len, sr.len_code_delta, distance_code)) } *num_literals += insert_length @@ -173,5 +179,7 @@ func createBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, insert_length += pos_end - position *last_insert_len = insert_length - *num_commands += uint(-cap(commands) + cap(orig_commands)) + + hasherSearchResultPool.Put(sr) + hasherSearchResultPool.Put(sr2) } diff --git a/vendor/github.com/andybalholm/brotli/backward_references_hq.go b/vendor/github.com/andybalholm/brotli/backward_references_hq.go index 5eac736..21629c1 100644 --- a/vendor/github.com/andybalholm/brotli/backward_references_hq.go +++ b/vendor/github.com/andybalholm/brotli/backward_references_hq.go @@ -123,14 +123,13 @@ func setCost(histogram []uint32, histogram_size uint, literal_histogram bool, co } } -func zopfliCostModelSetFromCommands(self *zopfliCostModel, position uint, ringbuffer []byte, ringbuffer_mask uint, commands []command, num_commands uint, last_insert_len uint) { +func zopfliCostModelSetFromCommands(self *zopfliCostModel, position uint, ringbuffer []byte, ringbuffer_mask uint, commands []command, last_insert_len uint) { var histogram_literal [numLiteralSymbols]uint32 var histogram_cmd [numCommandSymbols]uint32 var histogram_dist [maxEffectiveDistanceAlphabetSize]uint32 var cost_literal [numLiteralSymbols]float32 var pos uint = position - last_insert_len var min_cost_cmd float32 = kInfinity - var i uint var cost_cmd []float32 = self.cost_cmd_[:] var literal_costs []float32 @@ -138,7 +137,7 @@ func zopfliCostModelSetFromCommands(self *zopfliCostModel, position uint, ringbu histogram_cmd = [numCommandSymbols]uint32{} histogram_dist = [maxEffectiveDistanceAlphabetSize]uint32{} - for i = 0; i < num_commands; i++ { + for i := range commands { var inslength uint = uint(commands[i].insert_len_) var copylength uint = uint(commandCopyLen(&commands[i])) var distcode uint = uint(commands[i].dist_prefix_) & 0x3FF @@ -161,7 +160,7 @@ func zopfliCostModelSetFromCommands(self *zopfliCostModel, position uint, ringbu setCost(histogram_cmd[:], numCommandSymbols, false, cost_cmd) setCost(histogram_dist[:], uint(self.distance_histogram_size), false, self.cost_dist_) - for i = 0; i < numCommandSymbols; i++ { + for i := 0; i < numCommandSymbols; i++ { min_cost_cmd = brotli_min_float(min_cost_cmd, cost_cmd[i]) } @@ -169,10 +168,10 @@ func zopfliCostModelSetFromCommands(self *zopfliCostModel, position uint, ringbu { literal_costs = self.literal_costs_ var literal_carry float32 = 0.0 - var num_bytes uint = self.num_bytes_ + num_bytes := int(self.num_bytes_) literal_costs[0] = 0.0 - for i = 0; i < num_bytes; i++ { - literal_carry += cost_literal[ringbuffer[(position+i)&ringbuffer_mask]] + for i := 0; i < num_bytes; i++ { + literal_carry += cost_literal[ringbuffer[(position+uint(i))&ringbuffer_mask]] literal_costs[i+1] = literal_costs[i] + literal_carry literal_carry -= literal_costs[i+1] - literal_costs[i] } @@ -502,7 +501,9 @@ func updateNodes(num_bytes uint, block_start uint, pos uint, ringbuffer []byte, var cost float32 = dist_cost + float32(getCopyExtra(copycode)) + zopfliCostModelGetCommandCost(model, cmdcode) if cost < nodes[pos+len].u.cost { updateZopfliNode(nodes, pos, start, uint(len), len_code, dist, 0, cost) - result = brotli_max_size_t(result, uint(len)) + if len > result { + result = len + } } } } @@ -530,7 +531,7 @@ func computeShortestPathFromNodes(num_bytes uint, nodes []zopfliNode) uint { } /* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */ -func zopfliCreateCommands(num_bytes uint, block_start uint, nodes []zopfliNode, dist_cache []int, last_insert_len *uint, params *encoderParams, commands []command, num_literals *uint) { +func zopfliCreateCommands(num_bytes uint, block_start uint, nodes []zopfliNode, dist_cache []int, last_insert_len *uint, params *encoderParams, commands *[]command, num_literals *uint) { var max_backward_limit uint = maxBackwardLimit(params.lgwin) var pos uint = 0 var offset uint32 = nodes[0].u.next @@ -552,7 +553,7 @@ func zopfliCreateCommands(num_bytes uint, block_start uint, nodes []zopfliNode, var max_distance uint = brotli_min_size_t(block_start+pos, max_backward_limit) var is_dictionary bool = (distance > max_distance+gap) var dist_code uint = uint(zopfliNodeDistanceCode(next)) - initCommand(&commands[i], ¶ms.dist, insert_length, copy_length, int(len_code)-int(copy_length), dist_code) + *commands = append(*commands, makeCommand(¶ms.dist, insert_length, copy_length, int(len_code)-int(copy_length), dist_code)) if !is_dictionary && dist_code > 0 { dist_cache[3] = dist_cache[2] @@ -679,16 +680,16 @@ func zopfliComputeShortestPath(num_bytes uint, position uint, ringbuffer []byte, return computeShortestPathFromNodes(num_bytes, nodes) } -func createZopfliBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, hasher *h10, dist_cache []int, last_insert_len *uint, commands []command, num_commands *uint, num_literals *uint) { +func createZopfliBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, hasher *h10, dist_cache []int, last_insert_len *uint, commands *[]command, num_literals *uint) { var nodes []zopfliNode nodes = make([]zopfliNode, (num_bytes + 1)) initZopfliNodes(nodes, num_bytes+1) - *num_commands += zopfliComputeShortestPath(num_bytes, position, ringbuffer, ringbuffer_mask, params, dist_cache, hasher, nodes) + zopfliComputeShortestPath(num_bytes, position, ringbuffer, ringbuffer_mask, params, dist_cache, hasher, nodes) zopfliCreateCommands(num_bytes, position, nodes, dist_cache, last_insert_len, params, commands, num_literals) nodes = nil } -func createHqZopfliBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, hasher hasherHandle, dist_cache []int, last_insert_len *uint, commands []command, num_commands *uint, num_literals *uint) { +func createHqZopfliBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, hasher hasherHandle, dist_cache []int, last_insert_len *uint, commands *[]command, num_literals *uint) { var max_backward_limit uint = maxBackwardLimit(params.lgwin) var num_matches []uint32 = make([]uint32, num_bytes) var matches_size uint = 4 * num_bytes @@ -703,7 +704,7 @@ func createHqZopfliBackwardReferences(num_bytes uint, position uint, ringbuffer var orig_num_literals uint var orig_last_insert_len uint var orig_dist_cache [4]int - var orig_num_commands uint + var orig_num_commands int var model zopfliCostModel var nodes []zopfliNode var matches []backwardMatch = make([]backwardMatch, matches_size) @@ -769,7 +770,7 @@ func createHqZopfliBackwardReferences(num_bytes uint, position uint, ringbuffer orig_num_literals = *num_literals orig_last_insert_len = *last_insert_len copy(orig_dist_cache[:], dist_cache[:4]) - orig_num_commands = *num_commands + orig_num_commands = len(*commands) nodes = make([]zopfliNode, (num_bytes + 1)) initZopfliCostModel(&model, ¶ms.dist, num_bytes) for i = 0; i < 2; i++ { @@ -777,14 +778,14 @@ func createHqZopfliBackwardReferences(num_bytes uint, position uint, ringbuffer if i == 0 { zopfliCostModelSetFromLiteralCosts(&model, position, ringbuffer, ringbuffer_mask) } else { - zopfliCostModelSetFromCommands(&model, position, ringbuffer, ringbuffer_mask, commands, *num_commands-orig_num_commands, orig_last_insert_len) + zopfliCostModelSetFromCommands(&model, position, ringbuffer, ringbuffer_mask, (*commands)[orig_num_commands:], orig_last_insert_len) } - *num_commands = orig_num_commands + *commands = (*commands)[:orig_num_commands] *num_literals = orig_num_literals *last_insert_len = orig_last_insert_len copy(dist_cache, orig_dist_cache[:4]) - *num_commands += zopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask, params, gap, dist_cache, &model, num_matches, matches, nodes) + zopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask, params, gap, dist_cache, &model, num_matches, matches, nodes) zopfliCreateCommands(num_bytes, position, nodes, dist_cache, last_insert_len, params, commands, num_literals) } diff --git a/vendor/github.com/andybalholm/brotli/block_splitter.go b/vendor/github.com/andybalholm/brotli/block_splitter.go index 2ccff45..978a131 100644 --- a/vendor/github.com/andybalholm/brotli/block_splitter.go +++ b/vendor/github.com/andybalholm/brotli/block_splitter.go @@ -33,23 +33,21 @@ const ( kMinItersForRefining uint = 100 ) -func countLiterals(cmds []command, num_commands uint) uint { +func countLiterals(cmds []command) uint { var total_length uint = 0 /* Count how many we have. */ - var i uint - for i = 0; i < num_commands; i++ { + for i := range cmds { total_length += uint(cmds[i].insert_len_) } return total_length } -func copyLiteralsToByteArray(cmds []command, num_commands uint, data []byte, offset uint, mask uint, literals []byte) { +func copyLiteralsToByteArray(cmds []command, data []byte, offset uint, mask uint, literals []byte) { var pos uint = 0 var from_pos uint = offset & mask - var i uint - for i = 0; i < num_commands; i++ { + for i := range cmds { var insert_len uint = uint(cmds[i].insert_len_) if from_pos+insert_len > mask { var head_size uint = mask + 1 - from_pos @@ -90,24 +88,19 @@ const clustersPerBatch = 16 func initBlockSplit(self *blockSplit) { self.num_types = 0 self.num_blocks = 0 - self.types = nil - self.lengths = nil + self.types = self.types[:0] + self.lengths = self.lengths[:0] self.types_alloc_size = 0 self.lengths_alloc_size = 0 } -func destroyBlockSplit(self *blockSplit) { - self.types = nil - self.lengths = nil -} - -func splitBlock(cmds []command, num_commands uint, data []byte, pos uint, mask uint, params *encoderParams, literal_split *blockSplit, insert_and_copy_split *blockSplit, dist_split *blockSplit) { +func splitBlock(cmds []command, data []byte, pos uint, mask uint, params *encoderParams, literal_split *blockSplit, insert_and_copy_split *blockSplit, dist_split *blockSplit) { { - var literals_count uint = countLiterals(cmds, num_commands) + var literals_count uint = countLiterals(cmds) var literals []byte = make([]byte, literals_count) /* Create a continuous array of literals. */ - copyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals) + copyLiteralsToByteArray(cmds, data, pos, mask, literals) /* Create the block split on the array of literals. Literal histograms have alphabet size 256. */ @@ -116,28 +109,26 @@ func splitBlock(cmds []command, num_commands uint, data []byte, pos uint, mask u literals = nil } { - var insert_and_copy_codes []uint16 = make([]uint16, num_commands) + var insert_and_copy_codes []uint16 = make([]uint16, len(cmds)) /* Compute prefix codes for commands. */ - var i uint - for i = 0; i < num_commands; i++ { + for i := range cmds { insert_and_copy_codes[i] = cmds[i].cmd_prefix_ } /* Create the block split on the array of command prefixes. */ - splitByteVectorCommand(insert_and_copy_codes, num_commands, kSymbolsPerCommandHistogram, kMaxCommandHistograms, kCommandStrideLength, kCommandBlockSwitchCost, params, insert_and_copy_split) + splitByteVectorCommand(insert_and_copy_codes, kSymbolsPerCommandHistogram, kMaxCommandHistograms, kCommandStrideLength, kCommandBlockSwitchCost, params, insert_and_copy_split) /* TODO: reuse for distances? */ insert_and_copy_codes = nil } { - var distance_prefixes []uint16 = make([]uint16, num_commands) + var distance_prefixes []uint16 = make([]uint16, len(cmds)) var j uint = 0 /* Create a continuous array of distance prefixes. */ - var i uint - for i = 0; i < num_commands; i++ { + for i := range cmds { var cmd *command = &cmds[i] if commandCopyLen(cmd) != 0 && cmd.cmd_prefix_ >= 128 { distance_prefixes[j] = cmd.dist_prefix_ & 0x3FF diff --git a/vendor/github.com/andybalholm/brotli/block_splitter_command.go b/vendor/github.com/andybalholm/brotli/block_splitter_command.go index e505fe1..9dec13e 100644 --- a/vendor/github.com/andybalholm/brotli/block_splitter_command.go +++ b/vendor/github.com/andybalholm/brotli/block_splitter_command.go @@ -372,7 +372,8 @@ func clusterBlocksCommand(data []uint16, length uint, num_blocks uint, block_ids histogram_symbols = nil } -func splitByteVectorCommand(data []uint16, length uint, literals_per_histogram uint, max_histograms uint, sampling_stride_length uint, block_switch_cost float64, params *encoderParams, split *blockSplit) { +func splitByteVectorCommand(data []uint16, literals_per_histogram uint, max_histograms uint, sampling_stride_length uint, block_switch_cost float64, params *encoderParams, split *blockSplit) { + length := uint(len(data)) var data_size uint = histogramDataSizeCommand() var num_histograms uint = length/literals_per_histogram + 1 var histograms []histogramCommand diff --git a/vendor/github.com/andybalholm/brotli/brotli_bit_stream.go b/vendor/github.com/andybalholm/brotli/brotli_bit_stream.go index 395f604..7acfb18 100644 --- a/vendor/github.com/andybalholm/brotli/brotli_bit_stream.go +++ b/vendor/github.com/andybalholm/brotli/brotli_bit_stream.go @@ -1,6 +1,9 @@ package brotli -import "math" +import ( + "math" + "sync" +) const maxHuffmanTreeSize = (2*numCommandSymbols + 1) @@ -411,10 +414,12 @@ func buildAndStoreHuffmanTree(histogram []uint32, histogram_length uint, alphabe } } -func sortHuffmanTree1(v0 *huffmanTree, v1 *huffmanTree) bool { +func sortHuffmanTree1(v0 huffmanTree, v1 huffmanTree) bool { return v0.total_count_ < v1.total_count_ } +var huffmanTreePool sync.Pool + func buildAndStoreHuffmanTreeFast(histogram []uint32, histogram_total uint, max_bits uint, depth []byte, bits []uint16, storage_ix *uint, storage []byte) { var count uint = 0 var symbols = [4]uint{0} @@ -446,7 +451,13 @@ func buildAndStoreHuffmanTreeFast(histogram []uint32, histogram_total uint, max_ } { var max_tree_size uint = 2*length + 1 - var tree []huffmanTree = make([]huffmanTree, max_tree_size) + tree, _ := huffmanTreePool.Get().(*[]huffmanTree) + if tree == nil || cap(*tree) < int(max_tree_size) { + tmp := make([]huffmanTree, max_tree_size) + tree = &tmp + } else { + *tree = (*tree)[:max_tree_size] + } var count_limit uint32 for count_limit = 1; ; count_limit *= 2 { var node int = 0 @@ -455,9 +466,9 @@ func buildAndStoreHuffmanTreeFast(histogram []uint32, histogram_total uint, max_ l-- if histogram[l] != 0 { if histogram[l] >= count_limit { - initHuffmanTree(&tree[node:][0], histogram[l], -1, int16(l)) + initHuffmanTree(&(*tree)[node:][0], histogram[l], -1, int16(l)) } else { - initHuffmanTree(&tree[node:][0], count_limit, -1, int16(l)) + initHuffmanTree(&(*tree)[node:][0], count_limit, -1, int16(l)) } node++ @@ -471,7 +482,7 @@ func buildAndStoreHuffmanTreeFast(histogram []uint32, histogram_total uint, max_ var j int = n + 1 var k int - sortHuffmanTreeItems(tree, uint(n), huffmanTreeComparator(sortHuffmanTree1)) + sortHuffmanTreeItems(*tree, uint(n), huffmanTreeComparator(sortHuffmanTree1)) /* The nodes are: [0, n): the sorted leaf nodes that we start with. @@ -482,15 +493,15 @@ func buildAndStoreHuffmanTreeFast(histogram []uint32, histogram_total uint, max_ There will be (2n+1) elements at the end. */ initHuffmanTree(&sentinel, math.MaxUint32, -1, -1) - tree[node] = sentinel + (*tree)[node] = sentinel node++ - tree[node] = sentinel + (*tree)[node] = sentinel node++ for k = n - 1; k > 0; k-- { var left int var right int - if tree[i].total_count_ <= tree[j].total_count_ { + if (*tree)[i].total_count_ <= (*tree)[j].total_count_ { left = i i++ } else { @@ -498,7 +509,7 @@ func buildAndStoreHuffmanTreeFast(histogram []uint32, histogram_total uint, max_ j++ } - if tree[i].total_count_ <= tree[j].total_count_ { + if (*tree)[i].total_count_ <= (*tree)[j].total_count_ { right = i i++ } else { @@ -507,17 +518,17 @@ func buildAndStoreHuffmanTreeFast(histogram []uint32, histogram_total uint, max_ } /* The sentinel node becomes the parent node. */ - tree[node-1].total_count_ = tree[left].total_count_ + tree[right].total_count_ + (*tree)[node-1].total_count_ = (*tree)[left].total_count_ + (*tree)[right].total_count_ - tree[node-1].index_left_ = int16(left) - tree[node-1].index_right_or_value_ = int16(right) + (*tree)[node-1].index_left_ = int16(left) + (*tree)[node-1].index_right_or_value_ = int16(right) /* Add back the last sentinel node. */ - tree[node] = sentinel + (*tree)[node] = sentinel node++ } - if setDepth(2*n-1, tree, depth, 14) { + if setDepth(2*n-1, *tree, depth, 14) { /* We need to pack the Huffman tree in 14 bits. If this was not successful, add fake entities to the lowest values and retry. */ break @@ -525,7 +536,7 @@ func buildAndStoreHuffmanTreeFast(histogram []uint32, histogram_total uint, max_ } } - tree = nil + huffmanTreePool.Put(tree) } convertBitDepthsToSymbols(depth, length, bits) @@ -875,27 +886,37 @@ type blockEncoder struct { bits_ []uint16 } -func initBlockEncoder(self *blockEncoder, histogram_length uint, num_block_types uint, block_types []byte, block_lengths []uint32, num_blocks uint) { +var blockEncoderPool sync.Pool + +func getBlockEncoder(histogram_length uint, num_block_types uint, block_types []byte, block_lengths []uint32, num_blocks uint) *blockEncoder { + self, _ := blockEncoderPool.Get().(*blockEncoder) + + if self != nil { + self.block_ix_ = 0 + self.entropy_ix_ = 0 + self.depths_ = self.depths_[:0] + self.bits_ = self.bits_[:0] + } else { + self = &blockEncoder{} + } + self.histogram_length_ = histogram_length self.num_block_types_ = num_block_types self.block_types_ = block_types self.block_lengths_ = block_lengths self.num_blocks_ = num_blocks initBlockTypeCodeCalculator(&self.block_split_code_.type_code_calculator) - self.block_ix_ = 0 if num_blocks == 0 { self.block_len_ = 0 } else { self.block_len_ = uint(block_lengths[0]) } - self.entropy_ix_ = 0 - self.depths_ = nil - self.bits_ = nil + + return self } func cleanupBlockEncoder(self *blockEncoder) { - self.depths_ = nil - self.bits_ = nil + blockEncoderPool.Put(self) } /* Creates entropy codes of block lengths and block types and stores them @@ -948,8 +969,16 @@ func storeSymbolWithContext(self *blockEncoder, symbol uint, context uint, conte func buildAndStoreEntropyCodesLiteral(self *blockEncoder, histograms []histogramLiteral, histograms_size uint, alphabet_size uint, tree []huffmanTree, storage_ix *uint, storage []byte) { var table_size uint = histograms_size * self.histogram_length_ - self.depths_ = make([]byte, table_size) - self.bits_ = make([]uint16, table_size) + if cap(self.depths_) < int(table_size) { + self.depths_ = make([]byte, table_size) + } else { + self.depths_ = self.depths_[:table_size] + } + if cap(self.bits_) < int(table_size) { + self.bits_ = make([]uint16, table_size) + } else { + self.bits_ = self.bits_[:table_size] + } { var i uint for i = 0; i < histograms_size; i++ { @@ -961,8 +990,16 @@ func buildAndStoreEntropyCodesLiteral(self *blockEncoder, histograms []histogram func buildAndStoreEntropyCodesCommand(self *blockEncoder, histograms []histogramCommand, histograms_size uint, alphabet_size uint, tree []huffmanTree, storage_ix *uint, storage []byte) { var table_size uint = histograms_size * self.histogram_length_ - self.depths_ = make([]byte, table_size) - self.bits_ = make([]uint16, table_size) + if cap(self.depths_) < int(table_size) { + self.depths_ = make([]byte, table_size) + } else { + self.depths_ = self.depths_[:table_size] + } + if cap(self.bits_) < int(table_size) { + self.bits_ = make([]uint16, table_size) + } else { + self.bits_ = self.bits_[:table_size] + } { var i uint for i = 0; i < histograms_size; i++ { @@ -974,8 +1011,16 @@ func buildAndStoreEntropyCodesCommand(self *blockEncoder, histograms []histogram func buildAndStoreEntropyCodesDistance(self *blockEncoder, histograms []histogramDistance, histograms_size uint, alphabet_size uint, tree []huffmanTree, storage_ix *uint, storage []byte) { var table_size uint = histograms_size * self.histogram_length_ - self.depths_ = make([]byte, table_size) - self.bits_ = make([]uint16, table_size) + if cap(self.depths_) < int(table_size) { + self.depths_ = make([]byte, table_size) + } else { + self.depths_ = self.depths_[:table_size] + } + if cap(self.bits_) < int(table_size) { + self.bits_ = make([]uint16, table_size) + } else { + self.bits_ = self.bits_[:table_size] + } { var i uint for i = 0; i < histograms_size; i++ { @@ -990,16 +1035,13 @@ func jumpToByteBoundary(storage_ix *uint, storage []byte) { storage[*storage_ix>>3] = 0 } -func storeMetaBlock(input []byte, start_pos uint, length uint, mask uint, prev_byte byte, prev_byte2 byte, is_last bool, params *encoderParams, literal_context_mode int, commands []command, n_commands uint, mb *metaBlockSplit, storage_ix *uint, storage []byte) { +func storeMetaBlock(input []byte, start_pos uint, length uint, mask uint, prev_byte byte, prev_byte2 byte, is_last bool, params *encoderParams, literal_context_mode int, commands []command, mb *metaBlockSplit, storage_ix *uint, storage []byte) { var pos uint = start_pos var i uint var num_distance_symbols uint32 = params.dist.alphabet_size var num_effective_distance_symbols uint32 = num_distance_symbols var tree []huffmanTree var literal_context_lut contextLUT = getContextLUT(literal_context_mode) - var literal_enc blockEncoder - var command_enc blockEncoder - var distance_enc blockEncoder var dist *distanceParams = ¶ms.dist if params.large_window && num_effective_distance_symbols > numHistogramDistanceSymbols { num_effective_distance_symbols = numHistogramDistanceSymbols @@ -1008,13 +1050,13 @@ func storeMetaBlock(input []byte, start_pos uint, length uint, mask uint, prev_b storeCompressedMetaBlockHeader(is_last, length, storage_ix, storage) tree = make([]huffmanTree, maxHuffmanTreeSize) - initBlockEncoder(&literal_enc, numLiteralSymbols, mb.literal_split.num_types, mb.literal_split.types, mb.literal_split.lengths, mb.literal_split.num_blocks) - initBlockEncoder(&command_enc, numCommandSymbols, mb.command_split.num_types, mb.command_split.types, mb.command_split.lengths, mb.command_split.num_blocks) - initBlockEncoder(&distance_enc, uint(num_effective_distance_symbols), mb.distance_split.num_types, mb.distance_split.types, mb.distance_split.lengths, mb.distance_split.num_blocks) + literal_enc := getBlockEncoder(numLiteralSymbols, mb.literal_split.num_types, mb.literal_split.types, mb.literal_split.lengths, mb.literal_split.num_blocks) + command_enc := getBlockEncoder(numCommandSymbols, mb.command_split.num_types, mb.command_split.types, mb.command_split.lengths, mb.command_split.num_blocks) + distance_enc := getBlockEncoder(uint(num_effective_distance_symbols), mb.distance_split.num_types, mb.distance_split.types, mb.distance_split.lengths, mb.distance_split.num_blocks) - buildAndStoreBlockSwitchEntropyCodes(&literal_enc, tree, storage_ix, storage) - buildAndStoreBlockSwitchEntropyCodes(&command_enc, tree, storage_ix, storage) - buildAndStoreBlockSwitchEntropyCodes(&distance_enc, tree, storage_ix, storage) + buildAndStoreBlockSwitchEntropyCodes(literal_enc, tree, storage_ix, storage) + buildAndStoreBlockSwitchEntropyCodes(command_enc, tree, storage_ix, storage) + buildAndStoreBlockSwitchEntropyCodes(distance_enc, tree, storage_ix, storage) writeBits(2, uint64(dist.distance_postfix_bits), storage_ix, storage) writeBits(4, uint64(dist.num_direct_distance_codes)>>dist.distance_postfix_bits, storage_ix, storage) @@ -1034,20 +1076,19 @@ func storeMetaBlock(input []byte, start_pos uint, length uint, mask uint, prev_b encodeContextMap(mb.distance_context_map, mb.distance_context_map_size, mb.distance_histograms_size, tree, storage_ix, storage) } - buildAndStoreEntropyCodesLiteral(&literal_enc, mb.literal_histograms, mb.literal_histograms_size, numLiteralSymbols, tree, storage_ix, storage) - buildAndStoreEntropyCodesCommand(&command_enc, mb.command_histograms, mb.command_histograms_size, numCommandSymbols, tree, storage_ix, storage) - buildAndStoreEntropyCodesDistance(&distance_enc, mb.distance_histograms, mb.distance_histograms_size, uint(num_distance_symbols), tree, storage_ix, storage) + buildAndStoreEntropyCodesLiteral(literal_enc, mb.literal_histograms, mb.literal_histograms_size, numLiteralSymbols, tree, storage_ix, storage) + buildAndStoreEntropyCodesCommand(command_enc, mb.command_histograms, mb.command_histograms_size, numCommandSymbols, tree, storage_ix, storage) + buildAndStoreEntropyCodesDistance(distance_enc, mb.distance_histograms, mb.distance_histograms_size, uint(num_distance_symbols), tree, storage_ix, storage) tree = nil - for i = 0; i < n_commands; i++ { - var cmd command = commands[i] + for _, cmd := range commands { var cmd_code uint = uint(cmd.cmd_prefix_) - storeSymbol(&command_enc, cmd_code, storage_ix, storage) + storeSymbol(command_enc, cmd_code, storage_ix, storage) storeCommandExtra(&cmd, storage_ix, storage) if mb.literal_context_map_size == 0 { var j uint for j = uint(cmd.insert_len_); j != 0; j-- { - storeSymbol(&literal_enc, uint(input[pos&mask]), storage_ix, storage) + storeSymbol(literal_enc, uint(input[pos&mask]), storage_ix, storage) pos++ } } else { @@ -1055,7 +1096,7 @@ func storeMetaBlock(input []byte, start_pos uint, length uint, mask uint, prev_b for j = uint(cmd.insert_len_); j != 0; j-- { var context uint = uint(getContext(prev_byte, prev_byte2, literal_context_lut)) var literal byte = input[pos&mask] - storeSymbolWithContext(&literal_enc, uint(literal), context, mb.literal_context_map, storage_ix, storage, literalContextBits) + storeSymbolWithContext(literal_enc, uint(literal), context, mb.literal_context_map, storage_ix, storage, literalContextBits) prev_byte2 = prev_byte prev_byte = literal pos++ @@ -1071,10 +1112,10 @@ func storeMetaBlock(input []byte, start_pos uint, length uint, mask uint, prev_b var distnumextra uint32 = uint32(cmd.dist_prefix_) >> 10 var distextra uint64 = uint64(cmd.dist_extra_) if mb.distance_context_map_size == 0 { - storeSymbol(&distance_enc, dist_code, storage_ix, storage) + storeSymbol(distance_enc, dist_code, storage_ix, storage) } else { var context uint = uint(commandDistanceContext(&cmd)) - storeSymbolWithContext(&distance_enc, dist_code, context, mb.distance_context_map, storage_ix, storage, distanceContextBits) + storeSymbolWithContext(distance_enc, dist_code, context, mb.distance_context_map, storage_ix, storage, distanceContextBits) } writeBits(uint(distnumextra), distextra, storage_ix, storage) @@ -1082,19 +1123,17 @@ func storeMetaBlock(input []byte, start_pos uint, length uint, mask uint, prev_b } } - cleanupBlockEncoder(&distance_enc) - cleanupBlockEncoder(&command_enc) - cleanupBlockEncoder(&literal_enc) + cleanupBlockEncoder(distance_enc) + cleanupBlockEncoder(command_enc) + cleanupBlockEncoder(literal_enc) if is_last { jumpToByteBoundary(storage_ix, storage) } } -func buildHistograms(input []byte, start_pos uint, mask uint, commands []command, n_commands uint, lit_histo *histogramLiteral, cmd_histo *histogramCommand, dist_histo *histogramDistance) { +func buildHistograms(input []byte, start_pos uint, mask uint, commands []command, lit_histo *histogramLiteral, cmd_histo *histogramCommand, dist_histo *histogramDistance) { var pos uint = start_pos - var i uint - for i = 0; i < n_commands; i++ { - var cmd command = commands[i] + for _, cmd := range commands { var j uint histogramAddCommand(cmd_histo, uint(cmd.cmd_prefix_)) for j = uint(cmd.insert_len_); j != 0; j-- { @@ -1109,11 +1148,9 @@ func buildHistograms(input []byte, start_pos uint, mask uint, commands []command } } -func storeDataWithHuffmanCodes(input []byte, start_pos uint, mask uint, commands []command, n_commands uint, lit_depth []byte, lit_bits []uint16, cmd_depth []byte, cmd_bits []uint16, dist_depth []byte, dist_bits []uint16, storage_ix *uint, storage []byte) { +func storeDataWithHuffmanCodes(input []byte, start_pos uint, mask uint, commands []command, lit_depth []byte, lit_bits []uint16, cmd_depth []byte, cmd_bits []uint16, dist_depth []byte, dist_bits []uint16, storage_ix *uint, storage []byte) { var pos uint = start_pos - var i uint - for i = 0; i < n_commands; i++ { - var cmd command = commands[i] + for _, cmd := range commands { var cmd_code uint = uint(cmd.cmd_prefix_) var j uint writeBits(uint(cmd_depth[cmd_code]), uint64(cmd_bits[cmd_code]), storage_ix, storage) @@ -1135,7 +1172,7 @@ func storeDataWithHuffmanCodes(input []byte, start_pos uint, mask uint, commands } } -func storeMetaBlockTrivial(input []byte, start_pos uint, length uint, mask uint, is_last bool, params *encoderParams, commands []command, n_commands uint, storage_ix *uint, storage []byte) { +func storeMetaBlockTrivial(input []byte, start_pos uint, length uint, mask uint, is_last bool, params *encoderParams, commands []command, storage_ix *uint, storage []byte) { var lit_histo histogramLiteral var cmd_histo histogramCommand var dist_histo histogramDistance @@ -1154,7 +1191,7 @@ func storeMetaBlockTrivial(input []byte, start_pos uint, length uint, mask uint, histogramClearCommand(&cmd_histo) histogramClearDistance(&dist_histo) - buildHistograms(input, start_pos, mask, commands, n_commands, &lit_histo, &cmd_histo, &dist_histo) + buildHistograms(input, start_pos, mask, commands, &lit_histo, &cmd_histo, &dist_histo) writeBits(13, 0, storage_ix, storage) @@ -1163,13 +1200,13 @@ func storeMetaBlockTrivial(input []byte, start_pos uint, length uint, mask uint, buildAndStoreHuffmanTree(cmd_histo.data_[:], numCommandSymbols, numCommandSymbols, tree, cmd_depth[:], cmd_bits[:], storage_ix, storage) buildAndStoreHuffmanTree(dist_histo.data_[:], maxSimpleDistanceAlphabetSize, uint(num_distance_symbols), tree, dist_depth[:], dist_bits[:], storage_ix, storage) tree = nil - storeDataWithHuffmanCodes(input, start_pos, mask, commands, n_commands, lit_depth[:], lit_bits[:], cmd_depth[:], cmd_bits[:], dist_depth[:], dist_bits[:], storage_ix, storage) + storeDataWithHuffmanCodes(input, start_pos, mask, commands, lit_depth[:], lit_bits[:], cmd_depth[:], cmd_bits[:], dist_depth[:], dist_bits[:], storage_ix, storage) if is_last { jumpToByteBoundary(storage_ix, storage) } } -func storeMetaBlockFast(input []byte, start_pos uint, length uint, mask uint, is_last bool, params *encoderParams, commands []command, n_commands uint, storage_ix *uint, storage []byte) { +func storeMetaBlockFast(input []byte, start_pos uint, length uint, mask uint, is_last bool, params *encoderParams, commands []command, storage_ix *uint, storage []byte) { var num_distance_symbols uint32 = params.dist.alphabet_size var distance_alphabet_bits uint32 = log2FloorNonZero(uint(num_distance_symbols-1)) + 1 @@ -1177,15 +1214,13 @@ func storeMetaBlockFast(input []byte, start_pos uint, length uint, mask uint, is writeBits(13, 0, storage_ix, storage) - if n_commands <= 128 { + if len(commands) <= 128 { var histogram = [numLiteralSymbols]uint32{0} var pos uint = start_pos var num_literals uint = 0 - var i uint var lit_depth [numLiteralSymbols]byte var lit_bits [numLiteralSymbols]uint16 - for i = 0; i < n_commands; i++ { - var cmd command = commands[i] + for _, cmd := range commands { var j uint for j = uint(cmd.insert_len_); j != 0; j-- { histogram[input[pos&mask]]++ @@ -1201,7 +1236,7 @@ func storeMetaBlockFast(input []byte, start_pos uint, length uint, mask uint, is storeStaticCommandHuffmanTree(storage_ix, storage) storeStaticDistanceHuffmanTree(storage_ix, storage) - storeDataWithHuffmanCodes(input, start_pos, mask, commands, n_commands, lit_depth[:], lit_bits[:], kStaticCommandCodeDepth[:], kStaticCommandCodeBits[:], kStaticDistanceCodeDepth[:], kStaticDistanceCodeBits[:], storage_ix, storage) + storeDataWithHuffmanCodes(input, start_pos, mask, commands, lit_depth[:], lit_bits[:], kStaticCommandCodeDepth[:], kStaticCommandCodeBits[:], kStaticDistanceCodeDepth[:], kStaticDistanceCodeBits[:], storage_ix, storage) } else { var lit_histo histogramLiteral var cmd_histo histogramCommand @@ -1215,7 +1250,7 @@ func storeMetaBlockFast(input []byte, start_pos uint, length uint, mask uint, is histogramClearLiteral(&lit_histo) histogramClearCommand(&cmd_histo) histogramClearDistance(&dist_histo) - buildHistograms(input, start_pos, mask, commands, n_commands, &lit_histo, &cmd_histo, &dist_histo) + buildHistograms(input, start_pos, mask, commands, &lit_histo, &cmd_histo, &dist_histo) buildAndStoreHuffmanTreeFast(lit_histo.data_[:], lit_histo.total_count_, /* max_bits = */ 8, lit_depth[:], lit_bits[:], storage_ix, storage) @@ -1225,7 +1260,7 @@ func storeMetaBlockFast(input []byte, start_pos uint, length uint, mask uint, is buildAndStoreHuffmanTreeFast(dist_histo.data_[:], dist_histo.total_count_, /* max_bits = */ uint(distance_alphabet_bits), dist_depth[:], dist_bits[:], storage_ix, storage) - storeDataWithHuffmanCodes(input, start_pos, mask, commands, n_commands, lit_depth[:], lit_bits[:], cmd_depth[:], cmd_bits[:], dist_depth[:], dist_bits[:], storage_ix, storage) + storeDataWithHuffmanCodes(input, start_pos, mask, commands, lit_depth[:], lit_bits[:], cmd_depth[:], cmd_bits[:], dist_depth[:], dist_bits[:], storage_ix, storage) } if is_last { diff --git a/vendor/github.com/andybalholm/brotli/cluster_command.go b/vendor/github.com/andybalholm/brotli/cluster_command.go index 7449751..45b569b 100644 --- a/vendor/github.com/andybalholm/brotli/cluster_command.go +++ b/vendor/github.com/andybalholm/brotli/cluster_command.go @@ -1,7 +1,5 @@ package brotli -import "math" - /* Copyright 2013 Google Inc. All Rights Reserved. Distributed under MIT license. @@ -164,163 +162,3 @@ func histogramBitCostDistanceCommand(histogram *histogramCommand, candidate *his return populationCostCommand(&tmp) - candidate.bit_cost_ } } - -/* Find the best 'out' histogram for each of the 'in' histograms. - When called, clusters[0..num_clusters) contains the unique values from - symbols[0..in_size), but this property is not preserved in this function. - Note: we assume that out[]->bit_cost_ is already up-to-date. */ -func histogramRemapCommand(in []histogramCommand, in_size uint, clusters []uint32, num_clusters uint, out []histogramCommand, symbols []uint32) { - var i uint - for i = 0; i < in_size; i++ { - var best_out uint32 - if i == 0 { - best_out = symbols[0] - } else { - best_out = symbols[i-1] - } - var best_bits float64 = histogramBitCostDistanceCommand(&in[i], &out[best_out]) - var j uint - for j = 0; j < num_clusters; j++ { - var cur_bits float64 = histogramBitCostDistanceCommand(&in[i], &out[clusters[j]]) - if cur_bits < best_bits { - best_bits = cur_bits - best_out = clusters[j] - } - } - - symbols[i] = best_out - } - - /* Recompute each out based on raw and symbols. */ - for i = 0; i < num_clusters; i++ { - histogramClearCommand(&out[clusters[i]]) - } - - for i = 0; i < in_size; i++ { - histogramAddHistogramCommand(&out[symbols[i]], &in[i]) - } -} - -/* Reorders elements of the out[0..length) array and changes values in - symbols[0..length) array in the following way: - * when called, symbols[] contains indexes into out[], and has N unique - values (possibly N < length) - * on return, symbols'[i] = f(symbols[i]) and - out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length, - where f is a bijection between the range of symbols[] and [0..N), and - the first occurrences of values in symbols'[i] come in consecutive - increasing order. - Returns N, the number of unique values in symbols[]. */ - -var histogramReindexCommand_kInvalidIndex uint32 = math.MaxUint32 - -func histogramReindexCommand(out []histogramCommand, symbols []uint32, length uint) uint { - var new_index []uint32 = make([]uint32, length) - var next_index uint32 - var tmp []histogramCommand - var i uint - for i = 0; i < length; i++ { - new_index[i] = histogramReindexCommand_kInvalidIndex - } - - next_index = 0 - for i = 0; i < length; i++ { - if new_index[symbols[i]] == histogramReindexCommand_kInvalidIndex { - new_index[symbols[i]] = next_index - next_index++ - } - } - - /* TODO: by using idea of "cycle-sort" we can avoid allocation of - tmp and reduce the number of copying by the factor of 2. */ - tmp = make([]histogramCommand, next_index) - - next_index = 0 - for i = 0; i < length; i++ { - if new_index[symbols[i]] == next_index { - tmp[next_index] = out[symbols[i]] - next_index++ - } - - symbols[i] = new_index[symbols[i]] - } - - new_index = nil - for i = 0; uint32(i) < next_index; i++ { - out[i] = tmp[i] - } - - tmp = nil - return uint(next_index) -} - -func clusterHistogramsCommand(in []histogramCommand, in_size uint, max_histograms uint, out []histogramCommand, out_size *uint, histogram_symbols []uint32) { - var cluster_size []uint32 = make([]uint32, in_size) - var clusters []uint32 = make([]uint32, in_size) - var num_clusters uint = 0 - var max_input_histograms uint = 64 - var pairs_capacity uint = max_input_histograms * max_input_histograms / 2 - var pairs []histogramPair = make([]histogramPair, (pairs_capacity + 1)) - var i uint - - /* For the first pass of clustering, we allow all pairs. */ - for i = 0; i < in_size; i++ { - cluster_size[i] = 1 - } - - for i = 0; i < in_size; i++ { - out[i] = in[i] - out[i].bit_cost_ = populationCostCommand(&in[i]) - histogram_symbols[i] = uint32(i) - } - - for i = 0; i < in_size; i += max_input_histograms { - var num_to_combine uint = brotli_min_size_t(in_size-i, max_input_histograms) - var num_new_clusters uint - var j uint - for j = 0; j < num_to_combine; j++ { - clusters[num_clusters+j] = uint32(i + j) - } - - num_new_clusters = histogramCombineCommand(out, cluster_size, histogram_symbols[i:], clusters[num_clusters:], pairs, num_to_combine, num_to_combine, max_histograms, pairs_capacity) - num_clusters += num_new_clusters - } - { - /* For the second pass, we limit the total number of histogram pairs. - After this limit is reached, we only keep searching for the best pair. */ - var max_num_pairs uint = brotli_min_size_t(64*num_clusters, (num_clusters/2)*num_clusters) - if pairs_capacity < (max_num_pairs + 1) { - var _new_size uint - if pairs_capacity == 0 { - _new_size = max_num_pairs + 1 - } else { - _new_size = pairs_capacity - } - var new_array []histogramPair - for _new_size < (max_num_pairs + 1) { - _new_size *= 2 - } - new_array = make([]histogramPair, _new_size) - if pairs_capacity != 0 { - copy(new_array, pairs[:pairs_capacity]) - } - - pairs = new_array - pairs_capacity = _new_size - } - - /* Collapse similar histograms. */ - num_clusters = histogramCombineCommand(out, cluster_size, histogram_symbols, clusters, pairs, num_clusters, in_size, max_histograms, max_num_pairs) - } - - pairs = nil - cluster_size = nil - - /* Find the optimal map from original histograms to the final ones. */ - histogramRemapCommand(in, in_size, clusters, num_clusters, out, histogram_symbols) - - clusters = nil - - /* Convert the context map to a canonical form. */ - *out_size = histogramReindexCommand(out, histogram_symbols, in_size) -} diff --git a/vendor/github.com/andybalholm/brotli/command.go b/vendor/github.com/andybalholm/brotli/command.go index e93ccdf..b1662a5 100644 --- a/vendor/github.com/andybalholm/brotli/command.go +++ b/vendor/github.com/andybalholm/brotli/command.go @@ -194,26 +194,28 @@ type command struct { } /* distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1. */ -func initCommand(self *command, dist *distanceParams, insertlen uint, copylen uint, copylen_code_delta int, distance_code uint) { +func makeCommand(dist *distanceParams, insertlen uint, copylen uint, copylen_code_delta int, distance_code uint) (cmd command) { /* Don't rely on signed int representation, use honest casts. */ var delta uint32 = uint32(byte(int8(copylen_code_delta))) - self.insert_len_ = uint32(insertlen) - self.copy_len_ = uint32(uint32(copylen) | delta<<25) + cmd.insert_len_ = uint32(insertlen) + cmd.copy_len_ = uint32(uint32(copylen) | delta<<25) /* The distance prefix and extra bits are stored in this Command as if npostfix and ndirect were 0, they are only recomputed later after the clustering if needed. */ - prefixEncodeCopyDistance(distance_code, uint(dist.num_direct_distance_codes), uint(dist.distance_postfix_bits), &self.dist_prefix_, &self.dist_extra_) + prefixEncodeCopyDistance(distance_code, uint(dist.num_direct_distance_codes), uint(dist.distance_postfix_bits), &cmd.dist_prefix_, &cmd.dist_extra_) + getLengthCode(insertlen, uint(int(copylen)+copylen_code_delta), (cmd.dist_prefix_&0x3FF == 0), &cmd.cmd_prefix_) - getLengthCode(insertlen, uint(int(copylen)+copylen_code_delta), (self.dist_prefix_&0x3FF == 0), &self.cmd_prefix_) + return cmd } -func initInsertCommand(self *command, insertlen uint) { - self.insert_len_ = uint32(insertlen) - self.copy_len_ = 4 << 25 - self.dist_extra_ = 0 - self.dist_prefix_ = numDistanceShortCodes - getLengthCode(insertlen, 4, false, &self.cmd_prefix_) +func makeInsertCommand(insertlen uint) (cmd command) { + cmd.insert_len_ = uint32(insertlen) + cmd.copy_len_ = 4 << 25 + cmd.dist_extra_ = 0 + cmd.dist_prefix_ = numDistanceShortCodes + getLengthCode(insertlen, 4, false, &cmd.cmd_prefix_) + return cmd } func commandRestoreDistanceCode(self *command, dist *distanceParams) uint32 { diff --git a/vendor/github.com/andybalholm/brotli/compress_fragment.go b/vendor/github.com/andybalholm/brotli/compress_fragment.go index 435898e..c9bd057 100644 --- a/vendor/github.com/andybalholm/brotli/compress_fragment.go +++ b/vendor/github.com/andybalholm/brotli/compress_fragment.go @@ -33,14 +33,8 @@ func hashBytesAtOffset5(v uint64, offset int, shift uint) uint32 { } func isMatch5(p1 []byte, p2 []byte) bool { - var i int - for i = 0; i < 5; i++ { - if p1[i] != p2[i] { - return false - } - } - - return true + return binary.LittleEndian.Uint32(p1) == binary.LittleEndian.Uint32(p2) && + p1[4] == p2[4] } /* Builds a literal prefix code into "depths" and "bits" based on the statistics @@ -610,7 +604,7 @@ emit_commands: assert(candidate < ip) table[hash] = int(ip - base_ip) - if !(!isMatch5(in[ip:], in[candidate:])) { + if isMatch5(in[ip:], in[candidate:]) { break } } diff --git a/vendor/github.com/andybalholm/brotli/compress_fragment_two_pass.go b/vendor/github.com/andybalholm/brotli/compress_fragment_two_pass.go index ffeb321..172dc7f 100644 --- a/vendor/github.com/andybalholm/brotli/compress_fragment_two_pass.go +++ b/vendor/github.com/andybalholm/brotli/compress_fragment_two_pass.go @@ -30,14 +30,13 @@ func hashBytesAtOffset(v uint64, offset uint, shift uint, length uint) uint32 { } func isMatch1(p1 []byte, p2 []byte, length uint) bool { - var i uint - for i = 0; i < length && i < 6; i++ { - if p1[i] != p2[i] { - return false - } + if binary.LittleEndian.Uint32(p1) != binary.LittleEndian.Uint32(p2) { + return false } - - return true + if length == 4 { + return true + } + return p1[4] == p2[4] && p1[5] == p2[5] } /* Builds a command and distance prefix code (each 64 symbols) into "depth" and diff --git a/vendor/github.com/andybalholm/brotli/decode.go b/vendor/github.com/andybalholm/brotli/decode.go index d2f39a0..6a73b88 100644 --- a/vendor/github.com/andybalholm/brotli/decode.go +++ b/vendor/github.com/andybalholm/brotli/decode.go @@ -50,21 +50,6 @@ const ( decoderErrorUnreachable = -31 ) -/** - * The value of the last error code, negative integer. - * - * All other error code values are in the range from ::lastErrorCode - * to @c -1. There are also 4 other possible non-error codes @c 0 .. @c 3 in - * ::BrotliDecoderErrorCode enumeration. - */ -const lastErrorCode = decoderErrorUnreachable - -/** Options to be used with ::BrotliDecoderSetParameter. */ -const ( - decoderParamDisableRingBufferReallocation = 0 - decoderParamLargeWindow = 1 -) - const huffmanTableBits = 8 const huffmanTableMask = 0xFF @@ -81,28 +66,6 @@ var kCodeLengthPrefixLength = [16]byte{2, 2, 2, 3, 2, 2, 2, 4, 2, 2, 2, 3, 2, 2, var kCodeLengthPrefixValue = [16]byte{0, 4, 3, 2, 0, 4, 3, 1, 0, 4, 3, 2, 0, 4, 3, 5} -func decoderSetParameter(state *Reader, p int, value uint32) bool { - if state.state != stateUninited { - return false - } - switch p { - case decoderParamDisableRingBufferReallocation: - if !(value == 0) { - state.canny_ringbuffer_allocation = 0 - } else { - state.canny_ringbuffer_allocation = 1 - } - return true - - case decoderParamLargeWindow: - state.large_window = (!(value == 0)) - return true - - default: - return false - } -} - /* Saves error code and converts it to BrotliDecoderResult. */ func saveErrorCode(s *Reader, e int) int { s.error_code = int(e) @@ -1125,10 +1088,8 @@ func decodeContextMap(context_map_size uint32, num_htrees *uint32, context_map_a Reads 3..54 bits. */ func decodeBlockTypeAndLength(safe int, s *Reader, tree_type int) bool { var max_block_type uint32 = s.num_block_types[tree_type] - var type_tree []huffmanCode - type_tree = s.block_type_trees[tree_type*huffmanMaxSize258:] - var len_tree []huffmanCode - len_tree = s.block_len_trees[tree_type*huffmanMaxSize26:] + type_tree := s.block_type_trees[tree_type*huffmanMaxSize258:] + len_tree := s.block_len_trees[tree_type*huffmanMaxSize26:] var br *bitReader = &s.br var ringbuffer []uint32 = s.block_type_rb[tree_type*2:] var block_type uint32 @@ -1280,8 +1241,7 @@ func unwrittenBytes(s *Reader, wrap bool) uint { Returns BROTLI_DECODER_NEEDS_MORE_OUTPUT only if there is more output to push and either ring-buffer is as big as window size, or |force| is true. */ func writeRingBuffer(s *Reader, available_out *uint, next_out *[]byte, total_out *uint, force bool) int { - var start []byte - start = s.ringbuffer[s.partial_pos_out&uint(s.ringbuffer_mask):] + start := s.ringbuffer[s.partial_pos_out&uint(s.ringbuffer_mask):] var to_write uint = unwrittenBytes(s, true) var num_written uint = *available_out if num_written > to_write { @@ -1412,8 +1372,7 @@ func copyUncompressedBlockToOutput(available_out *uint, next_out *[]byte, total_ case stateUncompressedWrite: { - var result int - result = writeRingBuffer(s, available_out, next_out, total_out, false) + result := writeRingBuffer(s, available_out, next_out, total_out, false) if result != decoderSuccess { return result } @@ -1931,8 +1890,7 @@ CommandPostDecodeLiterals: } if transform_idx < int(trans.num_transforms) { - var word []byte - word = words.data[offset:] + word := words.data[offset:] var len int = i if transform_idx == int(trans.cutOffTransforms[0]) { copy(s.ringbuffer[pos:], word[:uint(len)]) @@ -1954,10 +1912,8 @@ CommandPostDecodeLiterals: } } else { var src_start int = (pos - s.distance_code) & s.ringbuffer_mask - var copy_dst []byte - copy_dst = s.ringbuffer[pos:] - var copy_src []byte - copy_src = s.ringbuffer[src_start:] + copy_dst := s.ringbuffer[pos:] + copy_src := s.ringbuffer[src_start:] var dst_end int = pos + i var src_end int = src_start + i @@ -2494,8 +2450,6 @@ func decoderDecompressStream(s *Reader, available_in *uint, next_in *[]byte, ava } else { s.state = stateCommandBegin } - - break } else if s.state == stateCommandPostWrite2 { s.state = stateCommandPostWrapCopy /* BROTLI_STATE_COMMAND_INNER_WRITE */ } else { diff --git a/vendor/github.com/andybalholm/brotli/encode.go b/vendor/github.com/andybalholm/brotli/encode.go index c01322b..8e25a4e 100644 --- a/vendor/github.com/andybalholm/brotli/encode.go +++ b/vendor/github.com/andybalholm/brotli/encode.go @@ -74,14 +74,13 @@ const ( type Writer struct { dst io.Writer options WriterOptions + err error params encoderParams hasher_ hasherHandle input_pos_ uint64 ringbuffer_ ringBuffer - cmd_alloc_size_ uint - commands_ []command - num_commands_ uint + commands []command num_literals_ uint last_insert_len_ uint last_flush_pos_ uint64 @@ -92,8 +91,7 @@ type Writer struct { last_bytes_bits_ byte prev_byte_ byte prev_byte2_ byte - storage_size_ uint - storage_ []byte + storage []byte small_table_ [1 << 10]int large_table_ []int large_table_size_ uint @@ -103,9 +101,6 @@ type Writer struct { cmd_code_numbits_ uint command_buf_ []uint32 literal_buf_ []byte - next_out_ []byte - available_out_ uint - total_out_ uint tiny_buf_ struct { u64 [2]uint64 u8 [16]byte @@ -146,14 +141,12 @@ func wrapPosition(position uint64) uint32 { return result } -func getBrotliStorage(s *Writer, size uint) []byte { - if s.storage_size_ < size { - s.storage_ = nil - s.storage_ = make([]byte, size) - s.storage_size_ = size +func (s *Writer) getStorage(size int) []byte { + if len(s.storage) < size { + s.storage = make([]byte, size) } - return s.storage_ + return s.storage } func hashTableSize(max_table_size uint, input_size uint) uint { @@ -222,332 +215,6 @@ func encodeWindowBits(lgwin int, large_window bool, last_bytes *uint16, last_byt } } -/* Initializes the command and distance prefix codes for the first block. */ - -var initCommandPrefixCodes_kDefaultCommandDepths = [128]byte{ - 0, - 4, - 4, - 5, - 6, - 6, - 7, - 7, - 7, - 7, - 7, - 8, - 8, - 8, - 8, - 8, - 0, - 0, - 0, - 4, - 4, - 4, - 4, - 4, - 5, - 5, - 6, - 6, - 6, - 6, - 7, - 7, - 7, - 7, - 10, - 10, - 10, - 10, - 10, - 10, - 0, - 4, - 4, - 5, - 5, - 5, - 6, - 6, - 7, - 8, - 8, - 9, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 5, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 6, - 6, - 6, - 6, - 6, - 6, - 5, - 5, - 5, - 5, - 5, - 5, - 4, - 4, - 4, - 4, - 4, - 4, - 4, - 5, - 5, - 5, - 5, - 5, - 5, - 6, - 6, - 7, - 7, - 7, - 8, - 10, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, -} -var initCommandPrefixCodes_kDefaultCommandBits = [128]uint16{ - 0, - 0, - 8, - 9, - 3, - 35, - 7, - 71, - 39, - 103, - 23, - 47, - 175, - 111, - 239, - 31, - 0, - 0, - 0, - 4, - 12, - 2, - 10, - 6, - 13, - 29, - 11, - 43, - 27, - 59, - 87, - 55, - 15, - 79, - 319, - 831, - 191, - 703, - 447, - 959, - 0, - 14, - 1, - 25, - 5, - 21, - 19, - 51, - 119, - 159, - 95, - 223, - 479, - 991, - 63, - 575, - 127, - 639, - 383, - 895, - 255, - 767, - 511, - 1023, - 14, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 27, - 59, - 7, - 39, - 23, - 55, - 30, - 1, - 17, - 9, - 25, - 5, - 0, - 8, - 4, - 12, - 2, - 10, - 6, - 21, - 13, - 29, - 3, - 19, - 11, - 15, - 47, - 31, - 95, - 63, - 127, - 255, - 767, - 2815, - 1791, - 3839, - 511, - 2559, - 1535, - 3583, - 1023, - 3071, - 2047, - 4095, -} -var initCommandPrefixCodes_kDefaultCommandCode = []byte{ - 0xff, - 0x77, - 0xd5, - 0xbf, - 0xe7, - 0xde, - 0xea, - 0x9e, - 0x51, - 0x5d, - 0xde, - 0xc6, - 0x70, - 0x57, - 0xbc, - 0x58, - 0x58, - 0x58, - 0xd8, - 0xd8, - 0x58, - 0xd5, - 0xcb, - 0x8c, - 0xea, - 0xe0, - 0xc3, - 0x87, - 0x1f, - 0x83, - 0xc1, - 0x60, - 0x1c, - 0x67, - 0xb2, - 0xaa, - 0x06, - 0x83, - 0xc1, - 0x60, - 0x30, - 0x18, - 0xcc, - 0xa1, - 0xce, - 0x88, - 0x54, - 0x94, - 0x46, - 0xe1, - 0xb0, - 0xd0, - 0x4e, - 0xb2, - 0xf7, - 0x04, - 0x00, -} -var initCommandPrefixCodes_kDefaultCommandCodeNumBits uint = 448 - -func initCommandPrefixCodes(cmd_depths []byte, cmd_bits []uint16, cmd_code []byte, cmd_code_numbits *uint) { - copy(cmd_depths, initCommandPrefixCodes_kDefaultCommandDepths[:]) - copy(cmd_bits, initCommandPrefixCodes_kDefaultCommandBits[:]) - - /* Initialize the pre-compressed form of the command and distance prefix - codes. */ - copy(cmd_code, initCommandPrefixCodes_kDefaultCommandCode) - - *cmd_code_numbits = initCommandPrefixCodes_kDefaultCommandCodeNumBits -} - /* Decide about the context map based on the ability of the prediction ability of the previous byte UTF8-prefix on the next byte. The prediction ability is calculated as Shannon entropy. Here we need @@ -557,136 +224,16 @@ func initCommandPrefixCodes(cmd_depths []byte, cmd_bits []uint16, cmd_code []byt coding. */ var kStaticContextMapContinuation = [64]uint32{ - 1, - 1, - 2, - 2, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, + 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } var kStaticContextMapSimpleUTF8 = [64]uint32{ - 0, - 0, - 1, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, + 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } func chooseContextMap(quality int, bigram_histo []uint32, num_literal_contexts *uint, literal_context_map *[]uint32) { @@ -738,70 +285,22 @@ func chooseContextMap(quality int, bigram_histo []uint32, num_literal_contexts * first 5 bits of literals. */ var kStaticContextMapComplexUTF8 = [64]uint32{ - 11, - 11, - 12, - 12, - 0, - 0, - 0, - 0, - 1, - 1, - 9, - 9, - 2, - 2, - 2, - 2, - 1, - 1, - 1, - 1, - 8, - 3, - 3, - 3, - 1, - 1, - 1, - 1, - 2, - 2, - 2, - 2, - 8, - 4, - 4, - 4, - 8, - 7, - 4, - 4, - 8, - 0, - 0, - 0, - 3, - 3, - 3, - 3, - 5, - 5, - 10, - 5, - 5, - 5, - 10, - 5, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, + 11, 11, 12, 12, /* 0 special */ + 0, 0, 0, 0, /* 4 lf */ + 1, 1, 9, 9, /* 8 space */ + 2, 2, 2, 2, /* !, first after space/lf and after something else. */ + 1, 1, 1, 1, /* " */ + 8, 3, 3, 3, /* % */ + 1, 1, 1, 1, /* ({[ */ + 2, 2, 2, 2, /* }]) */ + 8, 4, 4, 4, /* :; */ + 8, 7, 4, 4, /* . */ + 8, 0, 0, 0, /* > */ + 3, 3, 3, 3, /* [0..9] */ + 5, 5, 10, 5, /* [A-Z] */ + 5, 5, 10, 5, + 6, 6, 6, 6, /* [a-z] */ + 6, 6, 6, 6, } func shouldUseComplexStaticContextMap(input []byte, start_pos uint, length uint, mask uint, quality int, size_hint uint, num_literal_contexts *uint, literal_context_map *[]uint32) bool { @@ -933,7 +432,7 @@ func chooseContextMode(params *encoderParams, data []byte, pos uint, mask uint, return contextUTF8 } -func writeMetaBlockInternal(data []byte, mask uint, last_flush_pos uint64, bytes uint, is_last bool, literal_context_mode int, params *encoderParams, prev_byte byte, prev_byte2 byte, num_literals uint, num_commands uint, commands []command, saved_dist_cache []int, dist_cache []int, storage_ix *uint, storage []byte) { +func writeMetaBlockInternal(data []byte, mask uint, last_flush_pos uint64, bytes uint, is_last bool, literal_context_mode int, params *encoderParams, prev_byte byte, prev_byte2 byte, num_literals uint, commands []command, saved_dist_cache []int, dist_cache []int, storage_ix *uint, storage []byte) { var wrapped_last_flush_pos uint32 = wrapPosition(last_flush_pos) var last_bytes uint16 var last_bytes_bits byte @@ -948,7 +447,7 @@ func writeMetaBlockInternal(data []byte, mask uint, last_flush_pos uint64, bytes return } - if !shouldCompress_encode(data, mask, last_flush_pos, bytes, num_literals, num_commands) { + if !shouldCompress_encode(data, mask, last_flush_pos, bytes, num_literals, uint(len(commands))) { /* Restore the distance cache, as its last update by CreateBackwardReferences is now unused. */ copy(dist_cache, saved_dist_cache[:4]) @@ -961,12 +460,11 @@ func writeMetaBlockInternal(data []byte, mask uint, last_flush_pos uint64, bytes last_bytes = uint16(storage[1])<<8 | uint16(storage[0]) last_bytes_bits = byte(*storage_ix) if params.quality <= maxQualityForStaticEntropyCodes { - storeMetaBlockFast(data, uint(wrapped_last_flush_pos), bytes, mask, is_last, params, commands, num_commands, storage_ix, storage) + storeMetaBlockFast(data, uint(wrapped_last_flush_pos), bytes, mask, is_last, params, commands, storage_ix, storage) } else if params.quality < minQualityForBlockSplit { - storeMetaBlockTrivial(data, uint(wrapped_last_flush_pos), bytes, mask, is_last, params, commands, num_commands, storage_ix, storage) + storeMetaBlockTrivial(data, uint(wrapped_last_flush_pos), bytes, mask, is_last, params, commands, storage_ix, storage) } else { - var mb metaBlockSplit - initMetaBlockSplit(&mb) + mb := getMetaBlockSplit() if params.quality < minQualityForHqBlockSplitting { var num_literal_contexts uint = 1 var literal_context_map []uint32 = nil @@ -974,9 +472,9 @@ func writeMetaBlockInternal(data []byte, mask uint, last_flush_pos uint64, bytes decideOverLiteralContextModeling(data, uint(wrapped_last_flush_pos), bytes, mask, params.quality, params.size_hint, &num_literal_contexts, &literal_context_map) } - buildMetaBlockGreedy(data, uint(wrapped_last_flush_pos), mask, prev_byte, prev_byte2, literal_context_lut, num_literal_contexts, literal_context_map, commands, num_commands, &mb) + buildMetaBlockGreedy(data, uint(wrapped_last_flush_pos), mask, prev_byte, prev_byte2, literal_context_lut, num_literal_contexts, literal_context_map, commands, mb) } else { - buildMetaBlock(data, uint(wrapped_last_flush_pos), mask, &block_params, prev_byte, prev_byte2, commands, num_commands, literal_context_mode, &mb) + buildMetaBlock(data, uint(wrapped_last_flush_pos), mask, &block_params, prev_byte, prev_byte2, commands, literal_context_mode, mb) } if params.quality >= minQualityForOptimizeHistograms { @@ -988,11 +486,11 @@ func writeMetaBlockInternal(data []byte, mask uint, last_flush_pos uint64, bytes num_effective_dist_codes = numHistogramDistanceSymbols } - optimizeHistograms(num_effective_dist_codes, &mb) + optimizeHistograms(num_effective_dist_codes, mb) } - storeMetaBlock(data, uint(wrapped_last_flush_pos), bytes, mask, prev_byte, prev_byte2, is_last, &block_params, literal_context_mode, commands, num_commands, &mb, storage_ix, storage) - destroyMetaBlockSplit(&mb) + storeMetaBlock(data, uint(wrapped_last_flush_pos), bytes, mask, prev_byte, prev_byte2, is_last, &block_params, literal_context_mode, commands, mb, storage_ix, storage) + freeMetaBlockSplit(mb) } if bytes+4 < *storage_ix>>3 { @@ -1056,7 +554,38 @@ func ensureInitialized(s *Writer) bool { } if s.params.quality == fastOnePassCompressionQuality { - initCommandPrefixCodes(s.cmd_depths_[:], s.cmd_bits_[:], s.cmd_code_[:], &s.cmd_code_numbits_) + s.cmd_depths_ = [128]byte{ + 0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, + 0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, + 7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6, + 7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, + 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + } + s.cmd_bits_ = [128]uint16{ + 0, 0, 8, 9, 3, 35, 7, 71, + 39, 103, 23, 47, 175, 111, 239, 31, + 0, 0, 0, 4, 12, 2, 10, 6, + 13, 29, 11, 43, 27, 59, 87, 55, + 15, 79, 319, 831, 191, 703, 447, 959, + 0, 14, 1, 25, 5, 21, 19, 51, + 119, 159, 95, 223, 479, 991, 63, 575, + 127, 639, 383, 895, 255, 767, 511, 1023, + 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12, + 2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255, + 767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095, + } + s.cmd_code_ = [512]byte{ + 0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6, + 0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c, + 0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa, + 0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94, + 0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00, + } + s.cmd_code_numbits_ = 448 } s.is_initialized_ = true @@ -1081,33 +610,23 @@ func encoderInitParams(params *encoderParams) { func encoderInitState(s *Writer) { encoderInitParams(&s.params) s.input_pos_ = 0 - s.num_commands_ = 0 + s.commands = s.commands[:0] s.num_literals_ = 0 s.last_insert_len_ = 0 s.last_flush_pos_ = 0 s.last_processed_pos_ = 0 s.prev_byte_ = 0 s.prev_byte2_ = 0 - s.storage_size_ = 0 - s.storage_ = nil - s.hasher_ = nil - s.large_table_ = nil - s.large_table_size_ = 0 + if s.hasher_ != nil { + s.hasher_.Common().is_prepared_ = false + } s.cmd_code_numbits_ = 0 - s.command_buf_ = nil - s.literal_buf_ = nil - s.next_out_ = nil - s.available_out_ = 0 - s.total_out_ = 0 s.stream_state_ = streamProcessing s.is_last_block_emitted_ = false s.is_initialized_ = false ringBufferInit(&s.ringbuffer_) - s.commands_ = nil - s.cmd_alloc_size_ = 0 - /* Initialize distance cache. */ s.dist_cache_[0] = 4 @@ -1190,7 +709,7 @@ func updateLastProcessedPos(s *Writer) bool { } func extendLastCommand(s *Writer, bytes *uint32, wrapped_last_processed_pos *uint32) { - var last_command *command = &s.commands_[s.num_commands_-1] + var last_command *command = &s.commands[len(s.commands)-1] var data []byte = s.ringbuffer_.buffer_ var mask uint32 = s.ringbuffer_.mask_ var max_backward_distance uint64 = ((uint64(1)) << s.params.lgwin) - windowGap @@ -1219,18 +738,17 @@ func extendLastCommand(s *Writer, bytes *uint32, wrapped_last_processed_pos *uin } /* - Processes the accumulated input data and sets |*out_size| to the length of - the new output meta-block, or to zero if no new output meta-block has been - created (in this case the processed input data is buffered internally). - If |*out_size| is positive, |*output| points to the start of the output - data. If |is_last| or |force_flush| is true, an output meta-block is + Processes the accumulated input data and writes + the new output meta-block to s.dest, if one has been + created (otherwise the processed input data is buffered internally). + If |is_last| or |force_flush| is true, an output meta-block is always created. However, until |is_last| is true encoder may retain up to 7 bits of the last byte of output. To force encoder to dump the remaining bits use WriteMetadata() to append an empty meta-data block. Returns false if the size of the input data is larger than input_block_size(). */ -func encodeData(s *Writer, is_last bool, force_flush bool, out_size *uint, output *[]byte) bool { +func encodeData(s *Writer, is_last bool, force_flush bool) bool { var delta uint64 = unprocessedInputSize(s) var bytes uint32 = uint32(delta) var wrapped_last_processed_pos uint32 = wrapPosition(s.last_processed_pos_) @@ -1253,9 +771,14 @@ func encodeData(s *Writer, is_last bool, force_flush bool, out_size *uint, outpu return false } - if s.params.quality == fastTwoPassCompressionQuality && s.command_buf_ == nil { - s.command_buf_ = make([]uint32, kCompressFragmentTwoPassBlockSize) - s.literal_buf_ = make([]byte, kCompressFragmentTwoPassBlockSize) + if s.params.quality == fastTwoPassCompressionQuality { + if s.command_buf_ == nil || cap(s.command_buf_) < int(kCompressFragmentTwoPassBlockSize) { + s.command_buf_ = make([]uint32, kCompressFragmentTwoPassBlockSize) + s.literal_buf_ = make([]byte, kCompressFragmentTwoPassBlockSize) + } else { + s.command_buf_ = s.command_buf_[:kCompressFragmentTwoPassBlockSize] + s.literal_buf_ = s.literal_buf_[:kCompressFragmentTwoPassBlockSize] + } } if s.params.quality == fastOnePassCompressionQuality || s.params.quality == fastTwoPassCompressionQuality { @@ -1267,12 +790,10 @@ func encodeData(s *Writer, is_last bool, force_flush bool, out_size *uint, outpu if delta == 0 && !is_last { /* We have no new input data and we don't have to finish the stream, so nothing to do. */ - *out_size = 0 - return true } - storage = getBrotliStorage(s, uint(2*bytes+503)) + storage = s.getStorage(int(2*bytes + 503)) storage[0] = byte(s.last_bytes_) storage[1] = byte(s.last_bytes_ >> 8) table = getHashTable(s, s.params.quality, uint(bytes), &table_size) @@ -1285,28 +806,23 @@ func encodeData(s *Writer, is_last bool, force_flush bool, out_size *uint, outpu s.last_bytes_ = uint16(storage[storage_ix>>3]) s.last_bytes_bits_ = byte(storage_ix & 7) updateLastProcessedPos(s) - *output = storage[0:] - *out_size = storage_ix >> 3 + s.writeOutput(storage[:storage_ix>>3]) return true } { /* Theoretical max number of commands is 1 per 2 bytes. */ - var newsize uint = uint(uint32(s.num_commands_) + bytes/2 + 1) - if newsize > s.cmd_alloc_size_ { - var new_commands []command - + newsize := len(s.commands) + int(bytes)/2 + 1 + if newsize > cap(s.commands) { /* Reserve a bit more memory to allow merging with a next block without reallocation: that would impact speed. */ - newsize += uint((bytes / 4) + 16) + newsize += int(bytes/4) + 16 - s.cmd_alloc_size_ = newsize - new_commands = make([]command, newsize) - if s.commands_ != nil { - copy(new_commands, s.commands_[:s.num_commands_]) - s.commands_ = nil + new_commands := make([]command, len(s.commands), newsize) + if s.commands != nil { + copy(new_commands, s.commands) } - s.commands_ = new_commands + s.commands = new_commands } } @@ -1314,46 +830,44 @@ func encodeData(s *Writer, is_last bool, force_flush bool, out_size *uint, outpu literal_context_mode = chooseContextMode(&s.params, data, uint(wrapPosition(s.last_flush_pos_)), uint(mask), uint(s.input_pos_-s.last_flush_pos_)) - if s.num_commands_ != 0 && s.last_insert_len_ == 0 { + if len(s.commands) != 0 && s.last_insert_len_ == 0 { extendLastCommand(s, &bytes, &wrapped_last_processed_pos) } if s.params.quality == zopflificationQuality { assert(s.params.hasher.type_ == 10) - createZopfliBackwardReferences(uint(bytes), uint(wrapped_last_processed_pos), data, uint(mask), &s.params, s.hasher_.(*h10), s.dist_cache_[:], &s.last_insert_len_, s.commands_[s.num_commands_:], &s.num_commands_, &s.num_literals_) + createZopfliBackwardReferences(uint(bytes), uint(wrapped_last_processed_pos), data, uint(mask), &s.params, s.hasher_.(*h10), s.dist_cache_[:], &s.last_insert_len_, &s.commands, &s.num_literals_) } else if s.params.quality == hqZopflificationQuality { assert(s.params.hasher.type_ == 10) - createHqZopfliBackwardReferences(uint(bytes), uint(wrapped_last_processed_pos), data, uint(mask), &s.params, s.hasher_, s.dist_cache_[:], &s.last_insert_len_, s.commands_[s.num_commands_:], &s.num_commands_, &s.num_literals_) + createHqZopfliBackwardReferences(uint(bytes), uint(wrapped_last_processed_pos), data, uint(mask), &s.params, s.hasher_, s.dist_cache_[:], &s.last_insert_len_, &s.commands, &s.num_literals_) } else { - createBackwardReferences(uint(bytes), uint(wrapped_last_processed_pos), data, uint(mask), &s.params, s.hasher_, s.dist_cache_[:], &s.last_insert_len_, s.commands_[s.num_commands_:], &s.num_commands_, &s.num_literals_) + createBackwardReferences(uint(bytes), uint(wrapped_last_processed_pos), data, uint(mask), &s.params, s.hasher_, s.dist_cache_[:], &s.last_insert_len_, &s.commands, &s.num_literals_) } { var max_length uint = maxMetablockSize(&s.params) var max_literals uint = max_length / 8 - var max_commands uint = max_length / 8 + max_commands := int(max_length / 8) var processed_bytes uint = uint(s.input_pos_ - s.last_flush_pos_) var next_input_fits_metablock bool = (processed_bytes+inputBlockSize(s) <= max_length) - var should_flush bool = (s.params.quality < minQualityForBlockSplit && s.num_literals_+s.num_commands_ >= maxNumDelayedSymbols) + var should_flush bool = (s.params.quality < minQualityForBlockSplit && s.num_literals_+uint(len(s.commands)) >= maxNumDelayedSymbols) /* If maximal possible additional block doesn't fit metablock, flush now. */ /* TODO: Postpone decision until next block arrives? */ /* If block splitting is not used, then flush as soon as there is some amount of commands / literals produced. */ - if !is_last && !force_flush && !should_flush && next_input_fits_metablock && s.num_literals_ < max_literals && s.num_commands_ < max_commands { + if !is_last && !force_flush && !should_flush && next_input_fits_metablock && s.num_literals_ < max_literals && len(s.commands) < max_commands { /* Merge with next input block. Everything will happen later. */ if updateLastProcessedPos(s) { hasherReset(s.hasher_) } - *out_size = 0 return true } } /* Create the last insert-only command. */ if s.last_insert_len_ > 0 { - initInsertCommand(&s.commands_[s.num_commands_], s.last_insert_len_) - s.num_commands_++ + s.commands = append(s.commands, makeInsertCommand(s.last_insert_len_)) s.num_literals_ += s.last_insert_len_ s.last_insert_len_ = 0 } @@ -1361,8 +875,6 @@ func encodeData(s *Writer, is_last bool, force_flush bool, out_size *uint, outpu if !is_last && s.input_pos_ == s.last_flush_pos_ { /* We have no new input data and we don't have to finish the stream, so nothing to do. */ - *out_size = 0 - return true } @@ -1371,11 +883,11 @@ func encodeData(s *Writer, is_last bool, force_flush bool, out_size *uint, outpu assert(s.input_pos_-s.last_flush_pos_ <= 1<<24) { var metablock_size uint32 = uint32(s.input_pos_ - s.last_flush_pos_) - var storage []byte = getBrotliStorage(s, uint(2*metablock_size+503)) + var storage []byte = s.getStorage(int(2*metablock_size + 503)) var storage_ix uint = uint(s.last_bytes_bits_) storage[0] = byte(s.last_bytes_) storage[1] = byte(s.last_bytes_ >> 8) - writeMetaBlockInternal(data, uint(mask), s.last_flush_pos_, uint(metablock_size), is_last, literal_context_mode, &s.params, s.prev_byte_, s.prev_byte2_, s.num_literals_, s.num_commands_, s.commands_, s.saved_dist_cache_[:], s.dist_cache_[:], &storage_ix, storage) + writeMetaBlockInternal(data, uint(mask), s.last_flush_pos_, uint(metablock_size), is_last, literal_context_mode, &s.params, s.prev_byte_, s.prev_byte2_, s.num_literals_, s.commands, s.saved_dist_cache_[:], s.dist_cache_[:], &storage_ix, storage) s.last_bytes_ = uint16(storage[storage_ix>>3]) s.last_bytes_bits_ = byte(storage_ix & 7) s.last_flush_pos_ = s.input_pos_ @@ -1391,15 +903,14 @@ func encodeData(s *Writer, is_last bool, force_flush bool, out_size *uint, outpu s.prev_byte2_ = data[uint32(s.last_flush_pos_-2)&mask] } - s.num_commands_ = 0 + s.commands = s.commands[:0] s.num_literals_ = 0 /* Save the state of the distance cache in case we need to restore it for emitting an uncompressed block. */ copy(s.saved_dist_cache_[:], s.dist_cache_[:]) - *output = storage[0:] - *out_size = storage_ix >> 3 + s.writeOutput(storage[:storage_ix>>3]) return true } } @@ -1409,8 +920,7 @@ func encodeData(s *Writer, is_last bool, force_flush bool, out_size *uint, outpu REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long. REQUIRED: |block_size| <= (1 << 24). */ func writeMetadataHeader(s *Writer, block_size uint, header []byte) uint { - var storage_ix uint - storage_ix = uint(s.last_bytes_bits_) + storage_ix := uint(s.last_bytes_bits_) header[0] = byte(s.last_bytes_) header[1] = byte(s.last_bytes_ >> 8) s.last_bytes_ = 0 @@ -1439,7 +949,6 @@ func writeMetadataHeader(s *Writer, block_size uint, header []byte) uint { func injectBytePaddingBlock(s *Writer) { var seal uint32 = uint32(s.last_bytes_) var seal_bits uint = uint(s.last_bytes_bits_) - var destination []byte s.last_bytes_ = 0 s.last_bytes_bits_ = 0 @@ -1448,14 +957,7 @@ func injectBytePaddingBlock(s *Writer) { seal_bits += 6 - /* If we have already created storage, then append to it. - Storage is valid until next block is being compressed. */ - if s.next_out_ != nil { - destination = s.next_out_[s.available_out_:] - } else { - destination = s.tiny_buf_.u8[:] - s.next_out_ = destination - } + destination := s.tiny_buf_.u8[:] destination[0] = byte(seal) if seal_bits > 8 { @@ -1464,42 +966,35 @@ func injectBytePaddingBlock(s *Writer) { if seal_bits > 16 { destination[2] = byte(seal >> 16) } - s.available_out_ += (seal_bits + 7) >> 3 + s.writeOutput(destination[:(seal_bits+7)>>3]) } func checkFlushComplete(s *Writer) { - if s.stream_state_ == streamFlushRequested && s.available_out_ == 0 { + if s.stream_state_ == streamFlushRequested && s.err == nil { s.stream_state_ = streamProcessing - s.next_out_ = nil } } func encoderCompressStreamFast(s *Writer, op int, available_in *uint, next_in *[]byte) bool { var block_size_limit uint = uint(1) << s.params.lgwin var buf_size uint = brotli_min_size_t(kCompressFragmentTwoPassBlockSize, brotli_min_size_t(*available_in, block_size_limit)) - var tmp_command_buf []uint32 = nil var command_buf []uint32 = nil - var tmp_literal_buf []byte = nil var literal_buf []byte = nil if s.params.quality != fastOnePassCompressionQuality && s.params.quality != fastTwoPassCompressionQuality { return false } if s.params.quality == fastTwoPassCompressionQuality { - if s.command_buf_ == nil && buf_size == kCompressFragmentTwoPassBlockSize { - s.command_buf_ = make([]uint32, kCompressFragmentTwoPassBlockSize) - s.literal_buf_ = make([]byte, kCompressFragmentTwoPassBlockSize) - } - - if s.command_buf_ != nil { - command_buf = s.command_buf_ - literal_buf = s.literal_buf_ + if s.command_buf_ == nil || cap(s.command_buf_) < int(buf_size) { + s.command_buf_ = make([]uint32, buf_size) + s.literal_buf_ = make([]byte, buf_size) } else { - tmp_command_buf = make([]uint32, buf_size) - tmp_literal_buf = make([]byte, buf_size) - command_buf = tmp_command_buf - literal_buf = tmp_literal_buf + s.command_buf_ = s.command_buf_[:buf_size] + s.literal_buf_ = s.literal_buf_[:buf_size] } + + command_buf = s.command_buf_ + literal_buf = s.literal_buf_ } for { @@ -1508,10 +1003,10 @@ func encoderCompressStreamFast(s *Writer, op int, available_in *uint, next_in *[ continue } - /* Compress block only when internal output buffer is empty, stream is not + /* Compress block only when stream is not finished, there is no pending flush request, and there is either additional input or pending operation. */ - if s.available_out_ == 0 && s.stream_state_ == streamProcessing && (*available_in != 0 || op != int(operationProcess)) { + if s.stream_state_ == streamProcessing && (*available_in != 0 || op != int(operationProcess)) { var block_size uint = brotli_min_size_t(block_size_limit, *available_in) var is_last bool = (*available_in == block_size) && (op == int(operationFinish)) var force_flush bool = (*available_in == block_size) && (op == int(operationFlush)) @@ -1526,7 +1021,7 @@ func encoderCompressStreamFast(s *Writer, op int, available_in *uint, next_in *[ continue } - storage = getBrotliStorage(s, max_out_size) + storage = s.getStorage(int(max_out_size)) storage[0] = byte(s.last_bytes_) storage[1] = byte(s.last_bytes_ >> 8) @@ -1541,8 +1036,7 @@ func encoderCompressStreamFast(s *Writer, op int, available_in *uint, next_in *[ *next_in = (*next_in)[block_size:] *available_in -= block_size var out_bytes uint = storage_ix >> 3 - s.next_out_ = storage - s.available_out_ = out_bytes + s.writeOutput(storage[:out_bytes]) s.last_bytes_ = uint16(storage[storage_ix>>3]) s.last_bytes_bits_ = byte(storage_ix & 7) @@ -1559,8 +1053,6 @@ func encoderCompressStreamFast(s *Writer, op int, available_in *uint, next_in *[ break } - tmp_command_buf = nil - tmp_literal_buf = nil checkFlushComplete(s) return true } @@ -1586,12 +1078,8 @@ func processMetadata(s *Writer, available_in *uint, next_in *[]byte) bool { continue } - if s.available_out_ != 0 { - break - } - if s.input_pos_ != s.last_flush_pos_ { - var result bool = encodeData(s, false, true, &s.available_out_, &s.next_out_) + var result bool = encodeData(s, false, true) if !result { return false } @@ -1599,8 +1087,8 @@ func processMetadata(s *Writer, available_in *uint, next_in *[]byte) bool { } if s.stream_state_ == streamMetadataHead { - s.next_out_ = s.tiny_buf_.u8[:] - s.available_out_ = writeMetadataHeader(s, uint(s.remaining_metadata_bytes_), s.next_out_) + n := writeMetadataHeader(s, uint(s.remaining_metadata_bytes_), s.tiny_buf_.u8[:]) + s.writeOutput(s.tiny_buf_.u8[:n]) s.stream_state_ = streamMetadataBody continue } else { @@ -1614,12 +1102,11 @@ func processMetadata(s *Writer, available_in *uint, next_in *[]byte) bool { /* This guarantees progress in "TakeOutput" workflow. */ var c uint32 = brotli_min_uint32_t(s.remaining_metadata_bytes_, 16) - s.next_out_ = s.tiny_buf_.u8[:] - copy(s.next_out_, (*next_in)[:c]) + copy(s.tiny_buf_.u8[:], (*next_in)[:c]) *next_in = (*next_in)[c:] *available_in -= uint(c) s.remaining_metadata_bytes_ -= c - s.available_out_ = uint(c) + s.writeOutput(s.tiny_buf_.u8[:c]) continue } @@ -1692,15 +1179,15 @@ func encoderCompressStream(s *Writer, op int, available_in *uint, next_in *[]byt continue } - /* Compress data only when internal output buffer is empty, stream is not + /* Compress data only when stream is not finished and there is no pending flush request. */ - if s.available_out_ == 0 && s.stream_state_ == streamProcessing { + if s.stream_state_ == streamProcessing { if remaining_block_size == 0 || op != int(operationProcess) { var is_last bool = ((*available_in == 0) && op == int(operationFinish)) var force_flush bool = ((*available_in == 0) && op == int(operationFlush)) var result bool updateSizeHint(s, *available_in) - result = encodeData(s, is_last, force_flush, &s.available_out_, &s.next_out_) + result = encodeData(s, is_last, force_flush) if !result { return false } @@ -1721,18 +1208,13 @@ func encoderCompressStream(s *Writer, op int, available_in *uint, next_in *[]byt return true } -func encoderHasMoreOutput(s *Writer) bool { - return s.available_out_ != 0 -} - -func encoderTakeOutput(s *Writer) []byte { - if s.available_out_ == 0 { - return nil +func (w *Writer) writeOutput(data []byte) { + if w.err != nil { + return } - result := s.next_out_[:s.available_out_] - s.total_out_ += s.available_out_ - s.available_out_ = 0 - checkFlushComplete(s) - return result + _, w.err = w.dst.Write(data) + if w.err == nil { + checkFlushComplete(w) + } } diff --git a/vendor/github.com/andybalholm/brotli/entropy_encode.go b/vendor/github.com/andybalholm/brotli/entropy_encode.go index d0c1dca..3f469a3 100644 --- a/vendor/github.com/andybalholm/brotli/entropy_encode.go +++ b/vendor/github.com/andybalholm/brotli/entropy_encode.go @@ -24,7 +24,7 @@ func initHuffmanTree(self *huffmanTree, count uint32, left int16, right int16) { } /* Input size optimized Shell sort. */ -type huffmanTreeComparator func(*huffmanTree, *huffmanTree) bool +type huffmanTreeComparator func(huffmanTree, huffmanTree) bool var sortHuffmanTreeItems_gaps = []uint{132, 57, 23, 10, 4, 1} @@ -36,14 +36,13 @@ func sortHuffmanTreeItems(items []huffmanTree, n uint, comparator huffmanTreeCom var tmp huffmanTree = items[i] var k uint = i var j uint = i - 1 - for comparator(&tmp, &items[j]) { + for comparator(tmp, items[j]) { items[k] = items[j] k = j - tmp10 := j - j-- - if tmp10 == 0 { + if j == 0 { break } + j-- } items[k] = tmp @@ -63,7 +62,7 @@ func sortHuffmanTreeItems(items []huffmanTree, n uint, comparator huffmanTreeCom for i = gap; i < n; i++ { var j uint = i var tmp huffmanTree = items[i] - for ; j >= gap && comparator(&tmp, &items[j-gap]); j -= gap { + for ; j >= gap && comparator(tmp, items[j-gap]); j -= gap { items[j] = items[j-gap] } @@ -105,7 +104,7 @@ func setDepth(p0 int, pool []huffmanTree, depth []byte, max_depth int) bool { } /* Sort the root nodes, least popular first. */ -func sortHuffmanTree(v0 *huffmanTree, v1 *huffmanTree) bool { +func sortHuffmanTree(v0 huffmanTree, v1 huffmanTree) bool { if v0.total_count_ != v1.total_count_ { return v0.total_count_ < v1.total_count_ } diff --git a/vendor/github.com/andybalholm/brotli/fast_log.go b/vendor/github.com/andybalholm/brotli/fast_log.go index bbae300..9d6607f 100644 --- a/vendor/github.com/andybalholm/brotli/fast_log.go +++ b/vendor/github.com/andybalholm/brotli/fast_log.go @@ -1,6 +1,9 @@ package brotli -import "math" +import ( + "math" + "math/bits" +) /* Copyright 2013 Google Inc. All Rights Reserved. @@ -11,16 +14,7 @@ import "math" /* Utilities for fast computation of logarithms. */ func log2FloorNonZero(n uint) uint32 { - /* TODO: generalize and move to platform.h */ - var result uint32 = 0 - for { - n >>= 1 - if n == 0 { - break - } - result++ - } - return result + return uint32(bits.Len(n)) - 1 } /* A lookup table for small values of log2(int) to be used in entropy diff --git a/vendor/github.com/andybalholm/brotli/find_match_length.go b/vendor/github.com/andybalholm/brotli/find_match_length.go index 14d350a..09d2ae6 100644 --- a/vendor/github.com/andybalholm/brotli/find_match_length.go +++ b/vendor/github.com/andybalholm/brotli/find_match_length.go @@ -1,5 +1,11 @@ package brotli +import ( + "encoding/binary" + "math/bits" + "runtime" +) + /* Copyright 2010 Google Inc. All Rights Reserved. Distributed under MIT license. @@ -9,6 +15,29 @@ package brotli /* Function to find maximal matching prefixes of strings. */ func findMatchLengthWithLimit(s1 []byte, s2 []byte, limit uint) uint { var matched uint = 0 + _, _ = s1[limit-1], s2[limit-1] // bounds check + switch runtime.GOARCH { + case "amd64": + // Compare 8 bytes at at time. + for matched+8 <= limit { + w1 := binary.LittleEndian.Uint64(s1[matched:]) + w2 := binary.LittleEndian.Uint64(s2[matched:]) + if w1 != w2 { + return matched + uint(bits.TrailingZeros64(w1^w2)>>3) + } + matched += 8 + } + case "386": + // Compare 4 bytes at at time. + for matched+4 <= limit { + w1 := binary.LittleEndian.Uint32(s1[matched:]) + w2 := binary.LittleEndian.Uint32(s2[matched:]) + if w1 != w2 { + return matched + uint(bits.TrailingZeros32(w1^w2)>>3) + } + matched += 4 + } + } for matched < limit && s1[matched] == s2[matched] { matched++ } diff --git a/vendor/github.com/andybalholm/brotli/hash.go b/vendor/github.com/andybalholm/brotli/hash.go index 003b433..00f812e 100644 --- a/vendor/github.com/andybalholm/brotli/hash.go +++ b/vendor/github.com/andybalholm/brotli/hash.go @@ -29,8 +29,6 @@ type hasherHandle interface { Store(data []byte, mask uint, ix uint) } -type score_t uint - const kCutoffTransformsCount uint32 = 10 /* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */ diff --git a/vendor/github.com/andybalholm/brotli/hash_forgetful_chain.go b/vendor/github.com/andybalholm/brotli/hash_forgetful_chain.go index 3364c44..306e46d 100644 --- a/vendor/github.com/andybalholm/brotli/hash_forgetful_chain.go +++ b/vendor/github.com/andybalholm/brotli/hash_forgetful_chain.go @@ -110,8 +110,7 @@ func (h *hashForgetfulChain) Prepare(one_shot bool, input_size uint, data []byte func (h *hashForgetfulChain) Store(data []byte, mask uint, ix uint) { var key uint = h.HashBytes(data[ix&mask:]) var bank uint = key & (h.numBanks - 1) - var idx uint - idx = uint(h.free_slot_idx[bank]) & ((1 << h.bankBits) - 1) + idx := uint(h.free_slot_idx[bank]) & ((1 << h.bankBits) - 1) h.free_slot_idx[bank]++ var delta uint = ix - uint(h.addr[key]) h.tiny_hash[uint16(ix)] = byte(key) diff --git a/vendor/github.com/andybalholm/brotli/hash_rolling.go b/vendor/github.com/andybalholm/brotli/hash_rolling.go index ad655a0..6630fc0 100644 --- a/vendor/github.com/andybalholm/brotli/hash_rolling.go +++ b/vendor/github.com/andybalholm/brotli/hash_rolling.go @@ -48,7 +48,6 @@ type hashRolling struct { state uint32 table []uint32 next_ix uint - chunk_len uint32 factor uint32 factor_remove uint32 } diff --git a/vendor/github.com/andybalholm/brotli/histogram.go b/vendor/github.com/andybalholm/brotli/histogram.go index f208ff7..0346622 100644 --- a/vendor/github.com/andybalholm/brotli/histogram.go +++ b/vendor/github.com/andybalholm/brotli/histogram.go @@ -163,7 +163,7 @@ func initBlockSplitIterator(self *blockSplitIterator, split *blockSplit) { self.split_ = split self.idx_ = 0 self.type_ = 0 - if split.lengths != nil { + if len(split.lengths) > 0 { self.length_ = uint(split.lengths[0]) } else { self.length_ = 0 @@ -180,17 +180,16 @@ func blockSplitIteratorNext(self *blockSplitIterator) { self.length_-- } -func buildHistogramsWithContext(cmds []command, num_commands uint, literal_split *blockSplit, insert_and_copy_split *blockSplit, dist_split *blockSplit, ringbuffer []byte, start_pos uint, mask uint, prev_byte byte, prev_byte2 byte, context_modes []int, literal_histograms []histogramLiteral, insert_and_copy_histograms []histogramCommand, copy_dist_histograms []histogramDistance) { +func buildHistogramsWithContext(cmds []command, literal_split *blockSplit, insert_and_copy_split *blockSplit, dist_split *blockSplit, ringbuffer []byte, start_pos uint, mask uint, prev_byte byte, prev_byte2 byte, context_modes []int, literal_histograms []histogramLiteral, insert_and_copy_histograms []histogramCommand, copy_dist_histograms []histogramDistance) { var pos uint = start_pos var literal_it blockSplitIterator var insert_and_copy_it blockSplitIterator var dist_it blockSplitIterator - var i uint initBlockSplitIterator(&literal_it, literal_split) initBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split) initBlockSplitIterator(&dist_it, dist_split) - for i = 0; i < num_commands; i++ { + for i := range cmds { var cmd *command = &cmds[i] var j uint blockSplitIteratorNext(&insert_and_copy_it) diff --git a/vendor/github.com/andybalholm/brotli/http.go b/vendor/github.com/andybalholm/brotli/http.go index af58670..1e98196 100644 --- a/vendor/github.com/andybalholm/brotli/http.go +++ b/vendor/github.com/andybalholm/brotli/http.go @@ -180,8 +180,8 @@ func init() { var t octetType isCtl := c <= 31 || c == 127 isChar := 0 <= c && c <= 127 - isSeparator := strings.IndexRune(" \t\"(),/:;<=>?@[]\\{}", rune(c)) >= 0 - if strings.IndexRune(" \t\r\n", rune(c)) >= 0 { + isSeparator := strings.ContainsRune(" \t\"(),/:;<=>?@[]\\{}", rune(c)) + if strings.ContainsRune(" \t\r\n", rune(c)) { t |= isSpace } if isChar && !isCtl && !isSeparator { diff --git a/vendor/github.com/andybalholm/brotli/memory.go b/vendor/github.com/andybalholm/brotli/memory.go index 7208a3b..a07c705 100644 --- a/vendor/github.com/andybalholm/brotli/memory.go +++ b/vendor/github.com/andybalholm/brotli/memory.go @@ -23,12 +23,18 @@ func brotli_ensure_capacity_uint8_t(a *[]byte, c *uint, r uint) { for new_size < r { new_size *= 2 } - var new_array []byte = make([]byte, new_size) - if *c != 0 { - copy(new_array, (*a)[:*c]) + + if cap(*a) < int(new_size) { + var new_array []byte = make([]byte, new_size) + if *c != 0 { + copy(new_array, (*a)[:*c]) + } + + *a = new_array + } else { + *a = (*a)[:new_size] } - *a = new_array *c = new_size } } @@ -45,12 +51,16 @@ func brotli_ensure_capacity_uint32_t(a *[]uint32, c *uint, r uint) { new_size *= 2 } - new_array = make([]uint32, new_size) - if *c != 0 { - copy(new_array, (*a)[:*c]) - } + if cap(*a) < int(new_size) { + new_array = make([]uint32, new_size) + if *c != 0 { + copy(new_array, (*a)[:*c]) + } - *a = new_array + *a = new_array + } else { + *a = (*a)[:new_size] + } *c = new_size } } diff --git a/vendor/github.com/andybalholm/brotli/metablock.go b/vendor/github.com/andybalholm/brotli/metablock.go index 4a412cf..3014df8 100644 --- a/vendor/github.com/andybalholm/brotli/metablock.go +++ b/vendor/github.com/andybalholm/brotli/metablock.go @@ -1,5 +1,9 @@ package brotli +import ( + "sync" +) + /* Copyright 2014 Google Inc. All Rights Reserved. Distributed under MIT license. @@ -25,31 +29,30 @@ type metaBlockSplit struct { distance_histograms_size uint } -func initMetaBlockSplit(mb *metaBlockSplit) { - initBlockSplit(&mb.literal_split) - initBlockSplit(&mb.command_split) - initBlockSplit(&mb.distance_split) - mb.literal_context_map = nil - mb.literal_context_map_size = 0 - mb.distance_context_map = nil - mb.distance_context_map_size = 0 - mb.literal_histograms = nil - mb.literal_histograms_size = 0 - mb.command_histograms = nil - mb.command_histograms_size = 0 - mb.distance_histograms = nil - mb.distance_histograms_size = 0 +var metaBlockPool sync.Pool + +func getMetaBlockSplit() *metaBlockSplit { + mb, _ := metaBlockPool.Get().(*metaBlockSplit) + + if mb == nil { + mb = &metaBlockSplit{} + } else { + initBlockSplit(&mb.literal_split) + initBlockSplit(&mb.command_split) + initBlockSplit(&mb.distance_split) + mb.literal_context_map = mb.literal_context_map[:0] + mb.literal_context_map_size = 0 + mb.distance_context_map = mb.distance_context_map[:0] + mb.distance_context_map_size = 0 + mb.literal_histograms = mb.literal_histograms[:0] + mb.command_histograms = mb.command_histograms[:0] + mb.distance_histograms = mb.distance_histograms[:0] + } + return mb } -func destroyMetaBlockSplit(mb *metaBlockSplit) { - destroyBlockSplit(&mb.literal_split) - destroyBlockSplit(&mb.command_split) - destroyBlockSplit(&mb.distance_split) - mb.literal_context_map = nil - mb.distance_context_map = nil - mb.literal_histograms = nil - mb.command_histograms = nil - mb.distance_histograms = nil +func freeMetaBlockSplit(mb *metaBlockSplit) { + metaBlockPool.Put(mb) } func initDistanceParams(params *encoderParams, npostfix uint32, ndirect uint32) { @@ -84,14 +87,12 @@ func initDistanceParams(params *encoderParams, npostfix uint32, ndirect uint32) dist_params.max_distance = uint(max_distance) } -func recomputeDistancePrefixes(cmds []command, num_commands uint, orig_params *distanceParams, new_params *distanceParams) { - var i uint - +func recomputeDistancePrefixes(cmds []command, orig_params *distanceParams, new_params *distanceParams) { if orig_params.distance_postfix_bits == new_params.distance_postfix_bits && orig_params.num_direct_distance_codes == new_params.num_direct_distance_codes { return } - for i = 0; i < num_commands; i++ { + for i := range cmds { var cmd *command = &cmds[i] if commandCopyLen(cmd) != 0 && cmd.cmd_prefix_ >= 128 { prefixEncodeCopyDistance(uint(commandRestoreDistanceCode(cmd, orig_params)), uint(new_params.num_direct_distance_codes), uint(new_params.distance_postfix_bits), &cmd.dist_prefix_, &cmd.dist_extra_) @@ -99,8 +100,7 @@ func recomputeDistancePrefixes(cmds []command, num_commands uint, orig_params *d } } -func computeDistanceCost(cmds []command, num_commands uint, orig_params *distanceParams, new_params *distanceParams, cost *float64) bool { - var i uint +func computeDistanceCost(cmds []command, orig_params *distanceParams, new_params *distanceParams, cost *float64) bool { var equal_params bool = false var dist_prefix uint16 var dist_extra uint32 @@ -112,8 +112,8 @@ func computeDistanceCost(cmds []command, num_commands uint, orig_params *distanc equal_params = true } - for i = 0; i < num_commands; i++ { - var cmd *command = &cmds[i] + for i := range cmds { + cmd := &cmds[i] if commandCopyLen(cmd) != 0 && cmd.cmd_prefix_ >= 128 { if equal_params { dist_prefix = cmd.dist_prefix_ @@ -137,7 +137,7 @@ func computeDistanceCost(cmds []command, num_commands uint, orig_params *distanc var buildMetaBlock_kMaxNumberOfHistograms uint = 256 -func buildMetaBlock(ringbuffer []byte, pos uint, mask uint, params *encoderParams, prev_byte byte, prev_byte2 byte, cmds []command, num_commands uint, literal_context_mode int, mb *metaBlockSplit) { +func buildMetaBlock(ringbuffer []byte, pos uint, mask uint, params *encoderParams, prev_byte byte, prev_byte2 byte, cmds []command, literal_context_mode int, mb *metaBlockSplit) { var distance_histograms []histogramDistance var literal_histograms []histogramLiteral var literal_context_modes []int = nil @@ -164,7 +164,7 @@ func buildMetaBlock(ringbuffer []byte, pos uint, mask uint, params *encoderParam check_orig = false } - skip = !computeDistanceCost(cmds, num_commands, &orig_params.dist, &new_params.dist, &dist_cost) + skip = !computeDistanceCost(cmds, &orig_params.dist, &new_params.dist, &dist_cost) if skip || (dist_cost > best_dist_cost) { break } @@ -181,7 +181,7 @@ func buildMetaBlock(ringbuffer []byte, pos uint, mask uint, params *encoderParam if check_orig { var dist_cost float64 - computeDistanceCost(cmds, num_commands, &orig_params.dist, &orig_params.dist, &dist_cost) + computeDistanceCost(cmds, &orig_params.dist, &orig_params.dist, &dist_cost) if dist_cost < best_dist_cost { /* NB: currently unused; uncomment when more param tuning is added. */ /* best_dist_cost = dist_cost; */ @@ -189,9 +189,9 @@ func buildMetaBlock(ringbuffer []byte, pos uint, mask uint, params *encoderParam } } - recomputeDistancePrefixes(cmds, num_commands, &orig_params.dist, ¶ms.dist) + recomputeDistancePrefixes(cmds, &orig_params.dist, ¶ms.dist) - splitBlock(cmds, num_commands, ringbuffer, pos, mask, params, &mb.literal_split, &mb.command_split, &mb.distance_split) + splitBlock(cmds, ringbuffer, pos, mask, params, &mb.literal_split, &mb.command_split, &mb.distance_split) if !params.disable_literal_context_modeling { literal_context_multiplier = 1 << literalContextBits @@ -209,21 +209,30 @@ func buildMetaBlock(ringbuffer []byte, pos uint, mask uint, params *encoderParam distance_histograms = make([]histogramDistance, distance_histograms_size) clearHistogramsDistance(distance_histograms, distance_histograms_size) - assert(mb.command_histograms == nil) mb.command_histograms_size = mb.command_split.num_types - mb.command_histograms = make([]histogramCommand, (mb.command_histograms_size)) + if cap(mb.command_histograms) < int(mb.command_histograms_size) { + mb.command_histograms = make([]histogramCommand, (mb.command_histograms_size)) + } else { + mb.command_histograms = mb.command_histograms[:mb.command_histograms_size] + } clearHistogramsCommand(mb.command_histograms, mb.command_histograms_size) - buildHistogramsWithContext(cmds, num_commands, &mb.literal_split, &mb.command_split, &mb.distance_split, ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes, literal_histograms, mb.command_histograms, distance_histograms) + buildHistogramsWithContext(cmds, &mb.literal_split, &mb.command_split, &mb.distance_split, ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes, literal_histograms, mb.command_histograms, distance_histograms) literal_context_modes = nil - assert(mb.literal_context_map == nil) mb.literal_context_map_size = mb.literal_split.num_types << literalContextBits - mb.literal_context_map = make([]uint32, (mb.literal_context_map_size)) + if cap(mb.literal_context_map) < int(mb.literal_context_map_size) { + mb.literal_context_map = make([]uint32, (mb.literal_context_map_size)) + } else { + mb.literal_context_map = mb.literal_context_map[:mb.literal_context_map_size] + } - assert(mb.literal_histograms == nil) mb.literal_histograms_size = mb.literal_context_map_size - mb.literal_histograms = make([]histogramLiteral, (mb.literal_histograms_size)) + if cap(mb.literal_histograms) < int(mb.literal_histograms_size) { + mb.literal_histograms = make([]histogramLiteral, (mb.literal_histograms_size)) + } else { + mb.literal_histograms = mb.literal_histograms[:mb.literal_histograms_size] + } clusterHistogramsLiteral(literal_histograms, literal_histograms_size, buildMetaBlock_kMaxNumberOfHistograms, mb.literal_histograms, &mb.literal_histograms_size, mb.literal_context_map) literal_histograms = nil @@ -239,13 +248,19 @@ func buildMetaBlock(ringbuffer []byte, pos uint, mask uint, params *encoderParam } } - assert(mb.distance_context_map == nil) mb.distance_context_map_size = mb.distance_split.num_types << distanceContextBits - mb.distance_context_map = make([]uint32, (mb.distance_context_map_size)) + if cap(mb.distance_context_map) < int(mb.distance_context_map_size) { + mb.distance_context_map = make([]uint32, (mb.distance_context_map_size)) + } else { + mb.distance_context_map = mb.distance_context_map[:mb.distance_context_map_size] + } - assert(mb.distance_histograms == nil) mb.distance_histograms_size = mb.distance_context_map_size - mb.distance_histograms = make([]histogramDistance, (mb.distance_histograms_size)) + if cap(mb.distance_histograms) < int(mb.distance_histograms_size) { + mb.distance_histograms = make([]histogramDistance, (mb.distance_histograms_size)) + } else { + mb.distance_histograms = mb.distance_histograms[:mb.distance_histograms_size] + } clusterHistogramsDistance(distance_histograms, mb.distance_context_map_size, buildMetaBlock_kMaxNumberOfHistograms, mb.distance_histograms, &mb.distance_histograms_size, mb.distance_context_map) distance_histograms = nil @@ -298,9 +313,12 @@ func initContextBlockSplitter(self *contextBlockSplitter, alphabet_size uint, nu brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks) brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks) split.num_blocks = max_num_blocks - assert(*histograms == nil) *histograms_size = max_num_types * num_contexts - *histograms = make([]histogramLiteral, (*histograms_size)) + if histograms == nil || cap(*histograms) < int(*histograms_size) { + *histograms = make([]histogramLiteral, (*histograms_size)) + } else { + *histograms = (*histograms)[:*histograms_size] + } self.histograms_ = *histograms /* Clear only current histogram. */ @@ -453,9 +471,12 @@ func contextBlockSplitterAddSymbol(self *contextBlockSplitter, symbol uint, cont func mapStaticContexts(num_contexts uint, static_context_map []uint32, mb *metaBlockSplit) { var i uint - assert(mb.literal_context_map == nil) mb.literal_context_map_size = mb.literal_split.num_types << literalContextBits - mb.literal_context_map = make([]uint32, (mb.literal_context_map_size)) + if cap(mb.literal_context_map) < int(mb.literal_context_map_size) { + mb.literal_context_map = make([]uint32, (mb.literal_context_map_size)) + } else { + mb.literal_context_map = mb.literal_context_map[:mb.literal_context_map_size] + } for i = 0; i < mb.literal_split.num_types; i++ { var offset uint32 = uint32(i * num_contexts) @@ -466,7 +487,7 @@ func mapStaticContexts(num_contexts uint, static_context_map []uint32, mb *metaB } } -func buildMetaBlockGreedyInternal(ringbuffer []byte, pos uint, mask uint, prev_byte byte, prev_byte2 byte, literal_context_lut contextLUT, num_contexts uint, static_context_map []uint32, commands []command, n_commands uint, mb *metaBlockSplit) { +func buildMetaBlockGreedyInternal(ringbuffer []byte, pos uint, mask uint, prev_byte byte, prev_byte2 byte, literal_context_lut contextLUT, num_contexts uint, static_context_map []uint32, commands []command, mb *metaBlockSplit) { var lit_blocks struct { plain blockSplitterLiteral ctx contextBlockSplitter @@ -474,8 +495,7 @@ func buildMetaBlockGreedyInternal(ringbuffer []byte, pos uint, mask uint, prev_b var cmd_blocks blockSplitterCommand var dist_blocks blockSplitterDistance var num_literals uint = 0 - var i uint - for i = 0; i < n_commands; i++ { + for i := range commands { num_literals += uint(commands[i].insert_len_) } @@ -485,11 +505,10 @@ func buildMetaBlockGreedyInternal(ringbuffer []byte, pos uint, mask uint, prev_b initContextBlockSplitter(&lit_blocks.ctx, 256, num_contexts, 512, 400.0, num_literals, &mb.literal_split, &mb.literal_histograms, &mb.literal_histograms_size) } - initBlockSplitterCommand(&cmd_blocks, numCommandSymbols, 1024, 500.0, n_commands, &mb.command_split, &mb.command_histograms, &mb.command_histograms_size) - initBlockSplitterDistance(&dist_blocks, 64, 512, 100.0, n_commands, &mb.distance_split, &mb.distance_histograms, &mb.distance_histograms_size) + initBlockSplitterCommand(&cmd_blocks, numCommandSymbols, 1024, 500.0, uint(len(commands)), &mb.command_split, &mb.command_histograms, &mb.command_histograms_size) + initBlockSplitterDistance(&dist_blocks, 64, 512, 100.0, uint(len(commands)), &mb.distance_split, &mb.distance_histograms, &mb.distance_histograms_size) - for i = 0; i < n_commands; i++ { - var cmd command = commands[i] + for _, cmd := range commands { var j uint blockSplitterAddSymbolCommand(&cmd_blocks, uint(cmd.cmd_prefix_)) for j = uint(cmd.insert_len_); j != 0; j-- { @@ -530,11 +549,11 @@ func buildMetaBlockGreedyInternal(ringbuffer []byte, pos uint, mask uint, prev_b } } -func buildMetaBlockGreedy(ringbuffer []byte, pos uint, mask uint, prev_byte byte, prev_byte2 byte, literal_context_lut contextLUT, num_contexts uint, static_context_map []uint32, commands []command, n_commands uint, mb *metaBlockSplit) { +func buildMetaBlockGreedy(ringbuffer []byte, pos uint, mask uint, prev_byte byte, prev_byte2 byte, literal_context_lut contextLUT, num_contexts uint, static_context_map []uint32, commands []command, mb *metaBlockSplit) { if num_contexts == 1 { - buildMetaBlockGreedyInternal(ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_lut, 1, nil, commands, n_commands, mb) + buildMetaBlockGreedyInternal(ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_lut, 1, nil, commands, mb) } else { - buildMetaBlockGreedyInternal(ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_lut, num_contexts, static_context_map, commands, n_commands, mb) + buildMetaBlockGreedyInternal(ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_lut, num_contexts, static_context_map, commands, mb) } } diff --git a/vendor/github.com/andybalholm/brotli/metablock_command.go b/vendor/github.com/andybalholm/brotli/metablock_command.go index d47541c..14c7b77 100644 --- a/vendor/github.com/andybalholm/brotli/metablock_command.go +++ b/vendor/github.com/andybalholm/brotli/metablock_command.go @@ -43,9 +43,12 @@ func initBlockSplitterCommand(self *blockSplitterCommand, alphabet_size uint, mi brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks) brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks) self.split_.num_blocks = max_num_blocks - assert(*histograms == nil) *histograms_size = max_num_types - *histograms = make([]histogramCommand, (*histograms_size)) + if histograms == nil || cap(*histograms) < int(*histograms_size) { + *histograms = make([]histogramCommand, (*histograms_size)) + } else { + *histograms = (*histograms)[:*histograms_size] + } self.histograms_ = *histograms /* Clear only current histogram. */ diff --git a/vendor/github.com/andybalholm/brotli/metablock_distance.go b/vendor/github.com/andybalholm/brotli/metablock_distance.go index 9592312..5110a81 100644 --- a/vendor/github.com/andybalholm/brotli/metablock_distance.go +++ b/vendor/github.com/andybalholm/brotli/metablock_distance.go @@ -43,9 +43,12 @@ func initBlockSplitterDistance(self *blockSplitterDistance, alphabet_size uint, brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks) brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks) self.split_.num_blocks = max_num_blocks - assert(*histograms == nil) *histograms_size = max_num_types - *histograms = make([]histogramDistance, (*histograms_size)) + if histograms == nil || cap(*histograms) < int(*histograms_size) { + *histograms = make([]histogramDistance, *histograms_size) + } else { + *histograms = (*histograms)[:*histograms_size] + } self.histograms_ = *histograms /* Clear only current histogram. */ diff --git a/vendor/github.com/andybalholm/brotli/metablock_literal.go b/vendor/github.com/andybalholm/brotli/metablock_literal.go index d7e8a7c..307f8da 100644 --- a/vendor/github.com/andybalholm/brotli/metablock_literal.go +++ b/vendor/github.com/andybalholm/brotli/metablock_literal.go @@ -43,9 +43,12 @@ func initBlockSplitterLiteral(self *blockSplitterLiteral, alphabet_size uint, mi brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks) brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks) self.split_.num_blocks = max_num_blocks - assert(*histograms == nil) *histograms_size = max_num_types - *histograms = make([]histogramLiteral, (*histograms_size)) + if histograms == nil || cap(*histograms) < int(*histograms_size) { + *histograms = make([]histogramLiteral, *histograms_size) + } else { + *histograms = (*histograms)[:*histograms_size] + } self.histograms_ = *histograms /* Clear only current histogram. */ diff --git a/vendor/github.com/andybalholm/brotli/reader.go b/vendor/github.com/andybalholm/brotli/reader.go index 5c795e6..cdc6764 100644 --- a/vendor/github.com/andybalholm/brotli/reader.go +++ b/vendor/github.com/andybalholm/brotli/reader.go @@ -33,7 +33,9 @@ func NewReader(src io.Reader) *Reader { func (r *Reader) Reset(src io.Reader) error { decoderStateInit(r) r.src = src - r.buf = make([]byte, readBufSize) + if r.buf == nil { + r.buf = make([]byte, readBufSize) + } return nil } diff --git a/vendor/github.com/andybalholm/brotli/ringbuffer.go b/vendor/github.com/andybalholm/brotli/ringbuffer.go index 693a3f6..1c8f86f 100644 --- a/vendor/github.com/andybalholm/brotli/ringbuffer.go +++ b/vendor/github.com/andybalholm/brotli/ringbuffer.go @@ -27,10 +27,7 @@ type ringBuffer struct { } func ringBufferInit(rb *ringBuffer) { - rb.cur_size_ = 0 rb.pos_ = 0 - rb.data_ = nil - rb.buffer_ = nil } func ringBufferSetup(params *encoderParams, rb *ringBuffer) { @@ -47,11 +44,16 @@ const kSlackForEightByteHashingEverywhere uint = 7 /* Allocates or re-allocates data_ to the given length + plus some slack region before and after. Fills the slack regions with zeros. */ func ringBufferInitBuffer(buflen uint32, rb *ringBuffer) { - var new_data []byte = make([]byte, (2 + uint(buflen) + kSlackForEightByteHashingEverywhere)) + var new_data []byte var i uint + size := 2 + int(buflen) + int(kSlackForEightByteHashingEverywhere) + if cap(rb.data_) < size { + new_data = make([]byte, size) + } else { + new_data = rb.data_[:size] + } if rb.data_ != nil { copy(new_data, rb.data_[:2+rb.cur_size_+uint32(kSlackForEightByteHashingEverywhere)]) - rb.data_ = nil } rb.data_ = new_data diff --git a/vendor/github.com/andybalholm/brotli/static_dict.go b/vendor/github.com/andybalholm/brotli/static_dict.go index 8e7492d..bc05566 100644 --- a/vendor/github.com/andybalholm/brotli/static_dict.go +++ b/vendor/github.com/andybalholm/brotli/static_dict.go @@ -77,8 +77,7 @@ func findAllStaticDictionaryMatches(dict *encoderDictionary, data []byte, min_le var offset uint = uint(dict.buckets[hash(data)]) var end bool = offset == 0 for !end { - var w dictWord - w = dict.dict_words[offset] + w := dict.dict_words[offset] offset++ var l uint = uint(w.len) & 0x1F var n uint = uint(1) << dict.words.size_bits_by_length[l] @@ -431,8 +430,7 @@ func findAllStaticDictionaryMatches(dict *encoderDictionary, data []byte, min_le var offset uint = uint(dict.buckets[hash(data[1:])]) var end bool = offset == 0 for !end { - var w dictWord - w = dict.dict_words[offset] + w := dict.dict_words[offset] offset++ var l uint = uint(w.len) & 0x1F var n uint = uint(1) << dict.words.size_bits_by_length[l] @@ -596,8 +594,7 @@ func findAllStaticDictionaryMatches(dict *encoderDictionary, data []byte, min_le var offset uint = uint(dict.buckets[hash(data[2:])]) var end bool = offset == 0 for !end { - var w dictWord - w = dict.dict_words[offset] + w := dict.dict_words[offset] offset++ var l uint = uint(w.len) & 0x1F var n uint = uint(1) << dict.words.size_bits_by_length[l] @@ -629,8 +626,7 @@ func findAllStaticDictionaryMatches(dict *encoderDictionary, data []byte, min_le var offset uint = uint(dict.buckets[hash(data[5:])]) var end bool = offset == 0 for !end { - var w dictWord - w = dict.dict_words[offset] + w := dict.dict_words[offset] offset++ var l uint = uint(w.len) & 0x1F var n uint = uint(1) << dict.words.size_bits_by_length[l] diff --git a/vendor/github.com/andybalholm/brotli/utf8_util.go b/vendor/github.com/andybalholm/brotli/utf8_util.go index f86de3d..3244247 100644 --- a/vendor/github.com/andybalholm/brotli/utf8_util.go +++ b/vendor/github.com/andybalholm/brotli/utf8_util.go @@ -58,8 +58,7 @@ func isMostlyUTF8(data []byte, pos uint, mask uint, length uint, min_fraction fl var i uint = 0 for i < length { var symbol int - var current_data []byte - current_data = data[(pos+i)&mask:] + current_data := data[(pos+i)&mask:] var bytes_read uint = parseAsUTF8(&symbol, current_data, length-i) i += bytes_read if symbol < 0x110000 { diff --git a/vendor/github.com/andybalholm/brotli/write_bits.go b/vendor/github.com/andybalholm/brotli/write_bits.go index 8f15c20..8729901 100644 --- a/vendor/github.com/andybalholm/brotli/write_bits.go +++ b/vendor/github.com/andybalholm/brotli/write_bits.go @@ -1,5 +1,7 @@ package brotli +import "encoding/binary" + /* Copyright 2010 Google Inc. All Rights Reserved. Distributed under MIT license. @@ -24,21 +26,15 @@ package brotli For n bits, we take the last 5 bits, OR that with high bits in BYTE-0, and locate the rest in BYTE+1, BYTE+2, etc. */ func writeBits(n_bits uint, bits uint64, pos *uint, array []byte) { - var array_pos []byte = array[*pos>>3:] - var bits_reserved_in_first_byte uint = (*pos & 7) - /* implicit & 0xFF is assumed for uint8_t arithmetics */ - - var bits_left_to_write uint - bits <<= bits_reserved_in_first_byte - array_pos[0] |= byte(bits) - array_pos = array_pos[1:] - for bits_left_to_write = n_bits + bits_reserved_in_first_byte; bits_left_to_write >= 9; bits_left_to_write -= 8 { - bits >>= 8 - array_pos[0] = byte(bits) - array_pos = array_pos[1:] - } - - array_pos[0] = 0 + /* This branch of the code can write up to 56 bits at a time, + 7 bits are lost by being perhaps already in *p and at least + 1 bit is needed to initialize the bit-stream ahead (i.e. if 7 + bits are in *p and we write 57 bits, then the next write will + access a byte that was never initialized). */ + p := array[*pos>>3:] + v := uint64(p[0]) + v |= bits << (*pos & 7) + binary.LittleEndian.PutUint64(p, v) *pos += n_bits } diff --git a/vendor/github.com/andybalholm/brotli/writer.go b/vendor/github.com/andybalholm/brotli/writer.go index ec333f9..39feaef 100644 --- a/vendor/github.com/andybalholm/brotli/writer.go +++ b/vendor/github.com/andybalholm/brotli/writer.go @@ -61,12 +61,16 @@ func (w *Writer) Reset(dst io.Writer) { w.params.lgwin = uint(w.options.LGWin) } w.dst = dst + w.err = nil } func (w *Writer) writeChunk(p []byte, op int) (n int, err error) { if w.dst == nil { return 0, errWriterClosed } + if w.err != nil { + return 0, w.err + } for { availableIn := uint(len(p)) @@ -79,16 +83,8 @@ func (w *Writer) writeChunk(p []byte, op int) (n int, err error) { return n, errEncode } - outputData := encoderTakeOutput(w) - - if len(outputData) > 0 { - _, err = w.dst.Write(outputData) - if err != nil { - return n, err - } - } - if len(p) == 0 { - return n, nil + if len(p) == 0 || w.err != nil { + return n, w.err } } } diff --git a/vendor/github.com/bitrise-io/go-utils/command/command.go b/vendor/github.com/bitrise-io/go-utils/command/command.go index 4cd005a..c068490 100644 --- a/vendor/github.com/bitrise-io/go-utils/command/command.go +++ b/vendor/github.com/bitrise-io/go-utils/command/command.go @@ -2,14 +2,11 @@ package command import ( "errors" - "fmt" "io" "os" "os/exec" "strconv" "strings" - - "github.com/bitrise-io/go-utils/errorutil" ) // ---------- @@ -139,18 +136,10 @@ func PrintableCommandArgs(isQuoteFirst bool, fullCommandArgs []string) string { } // RunCmdAndReturnExitCode ... -func RunCmdAndReturnExitCode(cmd *exec.Cmd) (int, error) { - err := cmd.Run() - if err != nil { - exitCode, castErr := errorutil.CmdExitCodeFromError(err) - if castErr != nil { - return 1, fmt.Errorf("failed get exit code from error: %s, error: %s", err, castErr) - } - - return exitCode, err - } - - return 0, nil +func RunCmdAndReturnExitCode(cmd *exec.Cmd) (exitCode int, err error) { + err = cmd.Run() + exitCode = cmd.ProcessState.ExitCode() + return } // RunCmdAndReturnTrimmedOutput ... diff --git a/vendor/github.com/bitrise-io/go-utils/command/git/commands.go b/vendor/github.com/bitrise-io/go-utils/command/git/commands.go index c1e276b..1c217d8 100644 --- a/vendor/github.com/bitrise-io/go-utils/command/git/commands.go +++ b/vendor/github.com/bitrise-io/go-utils/command/git/commands.go @@ -62,8 +62,10 @@ func (g *Git) Clean(options ...string) *command.Model { } // SubmoduleUpdate updates the registered submodules. -func (g *Git) SubmoduleUpdate() *command.Model { - return g.command("submodule", "update", "--init", "--recursive") +func (g *Git) SubmoduleUpdate(opts ...string) *command.Model { + args := []string{"submodule", "update", "--init", "--recursive"} + args = append(args, opts...) + return g.command(args...) } // SubmoduleForeach evaluates an arbitrary git command in each checked out @@ -86,8 +88,9 @@ func (g *Git) Add(pathspec string) *command.Model { } // Branch lists branches. -func (g *Git) Branch() *command.Model { - return g.command("branch") +func (g *Git) Branch(opts ...string) *command.Model { + args := append([]string{"branch"}, opts...) + return g.command(args...) } // NewBranch creates a new branch as if git-branch were called and then check it out. @@ -101,8 +104,11 @@ func (g *Git) Apply(patch string) *command.Model { } // Log shows the commit logs. The format parameter controls what is shown and how. -func (g *Git) Log(format string) *command.Model { - return g.command("log", "-1", "--format="+format) +// Revision range can be optionally specified using the opts parameter. +func (g *Git) Log(format string, opts ...string) *command.Model { + args := []string{"log", "-1", "--format=" + format} + args = append(args, opts...) + return g.command(args...) } // RevList lists commit objects in reverse chronological order. @@ -135,6 +141,24 @@ func (g *Git) Status(opts ...string) *command.Model { } // Config sets a git config setting for the repository. -func (g *Git) Config(key string, value string) *command.Model { - return g.command("config", key, value) +func (g *Git) Config(key string, value string, opts ...string) *command.Model { + args := []string{"config", key, value} + args = append(args, opts...) + return g.command(args...) +} + +// SparseCheckoutInit initializes the sparse-checkout config file. +func (g *Git) SparseCheckoutInit(cone bool) *command.Model { + args := []string{"sparse-checkout", "init"} + if cone { + args = append(args, "--cone") + } + return g.command(args...) +} + +// SparseCheckoutSet writes the provided patterns to the sparse-checkout config file. +func (g *Git) SparseCheckoutSet(opts ...string) *command.Model { + args := []string{"sparse-checkout", "set"} + args = append(args, opts...) + return g.command(args...) } diff --git a/vendor/github.com/bitrise-io/go-utils/errorutil/errorutil.go b/vendor/github.com/bitrise-io/go-utils/errorutil/errorutil.go deleted file mode 100644 index 1128416..0000000 --- a/vendor/github.com/bitrise-io/go-utils/errorutil/errorutil.go +++ /dev/null @@ -1,36 +0,0 @@ -package errorutil - -import ( - "errors" - "os/exec" - "regexp" - "syscall" -) - -// IsExitStatusError ... -func IsExitStatusError(err error) bool { - return IsExitStatusErrorStr(err.Error()) -} - -// IsExitStatusErrorStr ... -func IsExitStatusErrorStr(errString string) bool { - // example exit status error string: exit status 1 - var rex = regexp.MustCompile(`^exit status [0-9]{1,3}$`) - return rex.MatchString(errString) -} - -// CmdExitCodeFromError ... -func CmdExitCodeFromError(err error) (int, error) { - cmdExitCode := 0 - if err != nil { - if exitError, ok := err.(*exec.ExitError); ok { - waitStatus, ok := exitError.Sys().(syscall.WaitStatus) - if !ok { - return 1, errors.New("Failed to cast exit status") - } - cmdExitCode = waitStatus.ExitStatus() - } - return cmdExitCode, nil - } - return 0, nil -} diff --git a/vendor/github.com/bitrise-io/go-utils/pathutil/path_filter.go b/vendor/github.com/bitrise-io/go-utils/pathutil/path_filter.go new file mode 100644 index 0000000..db0665d --- /dev/null +++ b/vendor/github.com/bitrise-io/go-utils/pathutil/path_filter.go @@ -0,0 +1,164 @@ +package pathutil + +import ( + "errors" + "io/ioutil" + "os" + "path/filepath" + "regexp" + "strings" +) + +// ListEntries filters contents of a directory using the provided filters +func ListEntries(dir string, filters ...FilterFunc) ([]string, error) { + absDir, err := filepath.Abs(dir) + if err != nil { + return []string{}, err + } + + entries, err := ioutil.ReadDir(absDir) + if err != nil { + return []string{}, err + } + + var paths []string + for _, entry := range entries { + pth := filepath.Join(absDir, entry.Name()) + paths = append(paths, pth) + } + + return FilterPaths(paths, filters...) +} + +// FilterPaths ... +func FilterPaths(fileList []string, filters ...FilterFunc) ([]string, error) { + var filtered []string + + for _, pth := range fileList { + allowed := true + for _, filter := range filters { + if allows, err := filter(pth); err != nil { + return []string{}, err + } else if !allows { + allowed = false + break + } + } + if allowed { + filtered = append(filtered, pth) + } + } + + return filtered, nil +} + +// FilterFunc ... +type FilterFunc func(string) (bool, error) + +// BaseFilter ... +func BaseFilter(base string, allowed bool) FilterFunc { + return func(pth string) (bool, error) { + b := filepath.Base(pth) + return allowed == strings.EqualFold(base, b), nil + } +} + +// ExtensionFilter ... +func ExtensionFilter(ext string, allowed bool) FilterFunc { + return func(pth string) (bool, error) { + e := filepath.Ext(pth) + return allowed == strings.EqualFold(ext, e), nil + } +} + +// RegexpFilter ... +func RegexpFilter(pattern string, allowed bool) FilterFunc { + return func(pth string) (bool, error) { + re := regexp.MustCompile(pattern) + found := re.FindString(pth) != "" + return allowed == found, nil + } +} + +// ComponentFilter ... +func ComponentFilter(component string, allowed bool) FilterFunc { + return func(pth string) (bool, error) { + found := false + pathComponents := strings.Split(pth, string(filepath.Separator)) + for _, c := range pathComponents { + if c == component { + found = true + } + } + return allowed == found, nil + } +} + +// ComponentWithExtensionFilter ... +func ComponentWithExtensionFilter(ext string, allowed bool) FilterFunc { + return func(pth string) (bool, error) { + found := false + pathComponents := strings.Split(pth, string(filepath.Separator)) + for _, c := range pathComponents { + e := filepath.Ext(c) + if e == ext { + found = true + } + } + return allowed == found, nil + } +} + +// IsDirectoryFilter ... +func IsDirectoryFilter(allowed bool) FilterFunc { + return func(pth string) (bool, error) { + fileInf, err := os.Lstat(pth) + if err != nil { + return false, err + } + if fileInf == nil { + return false, errors.New("no file info available") + } + return allowed == fileInf.IsDir(), nil + } +} + +// InDirectoryFilter ... +func InDirectoryFilter(dir string, allowed bool) FilterFunc { + return func(pth string) (bool, error) { + in := filepath.Dir(pth) == dir + return allowed == in, nil + } +} + +// DirectoryContainsFileFilter returns a FilterFunc that checks if a directory contains a file +func DirectoryContainsFileFilter(fileName string) FilterFunc { + return func(pth string) (bool, error) { + isDir, err := IsDirectoryFilter(true)(pth) + if err != nil { + return false, err + } + if !isDir { + return false, nil + } + + absPath := filepath.Join(pth, fileName) + if _, err := os.Lstat(absPath); err != nil { + if !os.IsNotExist(err) { + return false, err + } + return false, nil + } + return true, nil + } +} + +// FileContainsFilter ... +func FileContainsFilter(pth, str string) (bool, error) { + bytes, err := ioutil.ReadFile(pth) + if err != nil { + return false, err + } + + return strings.Contains(string(bytes), str), nil +} diff --git a/vendor/github.com/bitrise-io/go-utils/pathutil/pathutil.go b/vendor/github.com/bitrise-io/go-utils/pathutil/pathutil.go index a88ff5f..947c97c 100644 --- a/vendor/github.com/bitrise-io/go-utils/pathutil/pathutil.go +++ b/vendor/github.com/bitrise-io/go-utils/pathutil/pathutil.go @@ -10,47 +10,34 @@ import ( "strings" ) -// RevokableChangeDir ... -func RevokableChangeDir(dir string) (func() error, error) { - origDir, err := CurrentWorkingDirectoryAbsolutePath() - if err != nil { - return nil, err - } - - revokeFn := func() error { - return os.Chdir(origDir) +// NormalizedOSTempDirPath ... +// Creates a temp dir, and returns its path. +// If tmpDirNamePrefix is provided it'll be used +// as the tmp dir's name prefix. +// Normalized: it's guaranteed that the path won't end with '/'. +func NormalizedOSTempDirPath(tmpDirNamePrefix string) (retPth string, err error) { + retPth, err = ioutil.TempDir("", tmpDirNamePrefix) + if strings.HasSuffix(retPth, "/") { + retPth = retPth[:len(retPth)-1] } - - return revokeFn, os.Chdir(dir) + return } -// ChangeDirForFunction ... -func ChangeDirForFunction(dir string, fn func()) error { - revokeFn, err := RevokableChangeDir(dir) - if err != nil { - return err - } - - fn() - - return revokeFn() +// CurrentWorkingDirectoryAbsolutePath ... +func CurrentWorkingDirectoryAbsolutePath() (string, error) { + return filepath.Abs("./") } -// IsRelativePath ... -func IsRelativePath(pth string) bool { - if strings.HasPrefix(pth, "./") { - return true - } - - if strings.HasPrefix(pth, "/") { - return false - } - - if strings.HasPrefix(pth, "$") { - return false +// UserHomeDir ... +func UserHomeDir() string { + if runtime.GOOS == "windows" { + home := os.Getenv("HOMEDRIVE") + os.Getenv("HOMEPATH") + if home == "" { + home = os.Getenv("USERPROFILE") + } + return home } - - return true + return os.Getenv("HOME") } // EnsureDirExist ... @@ -76,12 +63,6 @@ func genericIsPathExists(pth string) (os.FileInfo, bool, error) { return fileInf, false, err } -// IsPathExists ... -func IsPathExists(pth string) (bool, error) { - _, isExists, err := genericIsPathExists(pth) - return isExists, err -} - // PathCheckAndInfos ... // Returns: // 1. file info or nil @@ -106,6 +87,32 @@ func IsDirExists(pth string) (bool, error) { return fileInf.IsDir(), nil } +// IsPathExists ... +func IsPathExists(pth string) (bool, error) { + _, isExists, err := genericIsPathExists(pth) + return isExists, err +} + +// +// Path modifier functions + +// PathModifier ... +type PathModifier interface { + AbsPath(pth string) (string, error) +} + +type defaultPathModifier struct{} + +// NewPathModifier ... +func NewPathModifier() PathModifier { + return defaultPathModifier{} +} + +// AbsPath ... +func (defaultPathModifier) AbsPath(pth string) (string, error) { + return AbsPath(pth) +} + // AbsPath expands ENV vars and the ~ character // then call Go's Abs func AbsPath(pth string) (string, error) { @@ -150,37 +157,65 @@ func ExpandTilde(pth string) (string, error) { return pth, nil } -// CurrentWorkingDirectoryAbsolutePath ... -func CurrentWorkingDirectoryAbsolutePath() (string, error) { - return filepath.Abs("./") -} +// IsRelativePath ... +func IsRelativePath(pth string) bool { + if strings.HasPrefix(pth, "./") { + return true + } -// UserHomeDir ... -func UserHomeDir() string { - if runtime.GOOS == "windows" { - home := os.Getenv("HOMEDRIVE") + os.Getenv("HOMEPATH") - if home == "" { - home = os.Getenv("USERPROFILE") - } - return home + if strings.HasPrefix(pth, "/") { + return false } - return os.Getenv("HOME") -} -// NormalizedOSTempDirPath ... -// Creates a temp dir, and returns its path. -// If tmpDirNamePrefix is provided it'll be used -// as the tmp dir's name prefix. -// Normalized: it's guaranteed that the path won't end with '/'. -func NormalizedOSTempDirPath(tmpDirNamePrefix string) (retPth string, err error) { - retPth, err = ioutil.TempDir("", tmpDirNamePrefix) - if strings.HasSuffix(retPth, "/") { - retPth = retPth[:len(retPth)-1] + if strings.HasPrefix(pth, "$") { + return false } - return + + return true } // GetFileName returns the name of the file from a given path or the name of the directory if it is a directory func GetFileName(path string) string { return strings.TrimSuffix(filepath.Base(path), filepath.Ext(path)) } + +// EscapeGlobPath escapes a partial path, determined at runtime, used as a parameter for filepath.Glob +func EscapeGlobPath(path string) string { + var escaped string + for _, ch := range path { + if ch == '[' || ch == ']' || ch == '-' || ch == '*' || ch == '?' || ch == '\\' { + escaped += "\\" + } + escaped += string(ch) + } + return escaped +} + +// +// Change dir functions + +// RevokableChangeDir ... +func RevokableChangeDir(dir string) (func() error, error) { + origDir, err := CurrentWorkingDirectoryAbsolutePath() + if err != nil { + return nil, err + } + + revokeFn := func() error { + return os.Chdir(origDir) + } + + return revokeFn, os.Chdir(dir) +} + +// ChangeDirForFunction ... +func ChangeDirForFunction(dir string, fn func()) error { + revokeFn, err := RevokableChangeDir(dir) + if err != nil { + return err + } + + fn() + + return revokeFn() +} diff --git a/vendor/github.com/bitrise-io/go-utils/pathutil/sortable_path.go b/vendor/github.com/bitrise-io/go-utils/pathutil/sortable_path.go new file mode 100644 index 0000000..97f63c3 --- /dev/null +++ b/vendor/github.com/bitrise-io/go-utils/pathutil/sortable_path.go @@ -0,0 +1,119 @@ +package pathutil + +import ( + "os" + "path/filepath" + "sort" + "strings" +) + +// ListPathInDirSortedByComponents ... +func ListPathInDirSortedByComponents(searchDir string, relPath bool) ([]string, error) { + searchDir, err := filepath.Abs(searchDir) + if err != nil { + return []string{}, err + } + + var fileList []string + + if err := filepath.Walk(searchDir, func(path string, _ os.FileInfo, walkErr error) error { + if walkErr != nil { + return walkErr + } + + if relPath { + rel, err := filepath.Rel(searchDir, path) + if err != nil { + return err + } + path = rel + } + + fileList = append(fileList, path) + + return nil + }); err != nil { + return []string{}, err + } + return SortPathsByComponents(fileList) +} + +// SortablePath ... +type SortablePath struct { + Pth string + AbsPth string + Components []string +} + +// NewSortablePath ... +func NewSortablePath(pth string) (SortablePath, error) { + absPth, err := AbsPath(pth) + if err != nil { + return SortablePath{}, err + } + + components := strings.Split(absPth, string(os.PathSeparator)) + fixedComponents := []string{} + for _, comp := range components { + if comp != "" { + fixedComponents = append(fixedComponents, comp) + } + } + + return SortablePath{ + Pth: pth, + AbsPth: absPth, + Components: fixedComponents, + }, nil +} + +// BySortablePathComponents .. +type BySortablePathComponents []SortablePath + +func (s BySortablePathComponents) Len() int { + return len(s) +} +func (s BySortablePathComponents) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} +func (s BySortablePathComponents) Less(i, j int) bool { + path1 := s[i] + path2 := s[j] + + d1 := len(path1.Components) + d2 := len(path2.Components) + + if d1 < d2 { + return true + } else if d1 > d2 { + return false + } + + // if same component size, + // do alphabetic sort based on the last component + base1 := filepath.Base(path1.AbsPth) + base2 := filepath.Base(path2.AbsPth) + + return base1 < base2 +} + +// SortPathsByComponents ... +func SortPathsByComponents(paths []string) ([]string, error) { + sortableFiles := []SortablePath{} + for _, pth := range paths { + sortable, err := NewSortablePath(pth) + if err != nil { + return []string{}, err + } + sortableFiles = append(sortableFiles, sortable) + } + + sort.Sort(BySortablePathComponents(sortableFiles)) + + sortedFiles := []string{} + for _, pth := range sortableFiles { + sortedFiles = append(sortedFiles, pth.Pth) + } + + return sortedFiles, nil +} diff --git a/vendor/github.com/golang/snappy/AUTHORS b/vendor/github.com/golang/snappy/AUTHORS index bcfa195..52ccb5a 100644 --- a/vendor/github.com/golang/snappy/AUTHORS +++ b/vendor/github.com/golang/snappy/AUTHORS @@ -8,8 +8,11 @@ # Please keep the list sorted. +Amazon.com, Inc Damian Gryski +Eric Buth Google Inc. Jan Mercl <0xjnml@gmail.com> +Klaus Post Rodolfo Carvalho Sebastien Binet diff --git a/vendor/github.com/golang/snappy/CONTRIBUTORS b/vendor/github.com/golang/snappy/CONTRIBUTORS index 931ae31..ea6524d 100644 --- a/vendor/github.com/golang/snappy/CONTRIBUTORS +++ b/vendor/github.com/golang/snappy/CONTRIBUTORS @@ -26,9 +26,13 @@ # Please keep the list sorted. +Alex Legg Damian Gryski +Eric Buth Jan Mercl <0xjnml@gmail.com> +Jonathan Swinney Kai Backman +Klaus Post Marc-Antoine Ruel Nigel Tao Rob Pike diff --git a/vendor/github.com/golang/snappy/decode.go b/vendor/github.com/golang/snappy/decode.go index 72efb03..23c6e26 100644 --- a/vendor/github.com/golang/snappy/decode.go +++ b/vendor/github.com/golang/snappy/decode.go @@ -52,6 +52,8 @@ const ( // Otherwise, a newly allocated slice will be returned. // // The dst and src must not overlap. It is valid to pass a nil dst. +// +// Decode handles the Snappy block format, not the Snappy stream format. func Decode(dst, src []byte) ([]byte, error) { dLen, s, err := decodedLen(src) if err != nil { @@ -83,6 +85,8 @@ func NewReader(r io.Reader) *Reader { } // Reader is an io.Reader that can read Snappy-compressed bytes. +// +// Reader handles the Snappy stream format, not the Snappy block format. type Reader struct { r io.Reader err error @@ -114,32 +118,23 @@ func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) { return true } -// Read satisfies the io.Reader interface. -func (r *Reader) Read(p []byte) (int, error) { - if r.err != nil { - return 0, r.err - } - for { - if r.i < r.j { - n := copy(p, r.decoded[r.i:r.j]) - r.i += n - return n, nil - } +func (r *Reader) fill() error { + for r.i >= r.j { if !r.readFull(r.buf[:4], true) { - return 0, r.err + return r.err } chunkType := r.buf[0] if !r.readHeader { if chunkType != chunkTypeStreamIdentifier { r.err = ErrCorrupt - return 0, r.err + return r.err } r.readHeader = true } chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 if chunkLen > len(r.buf) { r.err = ErrUnsupported - return 0, r.err + return r.err } // The chunk types are specified at @@ -149,11 +144,11 @@ func (r *Reader) Read(p []byte) (int, error) { // Section 4.2. Compressed data (chunk type 0x00). if chunkLen < checksumSize { r.err = ErrCorrupt - return 0, r.err + return r.err } buf := r.buf[:chunkLen] if !r.readFull(buf, false) { - return 0, r.err + return r.err } checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 buf = buf[checksumSize:] @@ -161,19 +156,19 @@ func (r *Reader) Read(p []byte) (int, error) { n, err := DecodedLen(buf) if err != nil { r.err = err - return 0, r.err + return r.err } if n > len(r.decoded) { r.err = ErrCorrupt - return 0, r.err + return r.err } if _, err := Decode(r.decoded, buf); err != nil { r.err = err - return 0, r.err + return r.err } if crc(r.decoded[:n]) != checksum { r.err = ErrCorrupt - return 0, r.err + return r.err } r.i, r.j = 0, n continue @@ -182,25 +177,25 @@ func (r *Reader) Read(p []byte) (int, error) { // Section 4.3. Uncompressed data (chunk type 0x01). if chunkLen < checksumSize { r.err = ErrCorrupt - return 0, r.err + return r.err } buf := r.buf[:checksumSize] if !r.readFull(buf, false) { - return 0, r.err + return r.err } checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 // Read directly into r.decoded instead of via r.buf. n := chunkLen - checksumSize if n > len(r.decoded) { r.err = ErrCorrupt - return 0, r.err + return r.err } if !r.readFull(r.decoded[:n], false) { - return 0, r.err + return r.err } if crc(r.decoded[:n]) != checksum { r.err = ErrCorrupt - return 0, r.err + return r.err } r.i, r.j = 0, n continue @@ -209,15 +204,15 @@ func (r *Reader) Read(p []byte) (int, error) { // Section 4.1. Stream identifier (chunk type 0xff). if chunkLen != len(magicBody) { r.err = ErrCorrupt - return 0, r.err + return r.err } if !r.readFull(r.buf[:len(magicBody)], false) { - return 0, r.err + return r.err } for i := 0; i < len(magicBody); i++ { if r.buf[i] != magicBody[i] { r.err = ErrCorrupt - return 0, r.err + return r.err } } continue @@ -226,12 +221,44 @@ func (r *Reader) Read(p []byte) (int, error) { if chunkType <= 0x7f { // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). r.err = ErrUnsupported - return 0, r.err + return r.err } // Section 4.4 Padding (chunk type 0xfe). // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). if !r.readFull(r.buf[:chunkLen], false) { - return 0, r.err + return r.err } } + + return nil +} + +// Read satisfies the io.Reader interface. +func (r *Reader) Read(p []byte) (int, error) { + if r.err != nil { + return 0, r.err + } + + if err := r.fill(); err != nil { + return 0, err + } + + n := copy(p, r.decoded[r.i:r.j]) + r.i += n + return n, nil +} + +// ReadByte satisfies the io.ByteReader interface. +func (r *Reader) ReadByte() (byte, error) { + if r.err != nil { + return 0, r.err + } + + if err := r.fill(); err != nil { + return 0, err + } + + c := r.decoded[r.i] + r.i++ + return c, nil } diff --git a/vendor/github.com/golang/snappy/decode_amd64.go b/vendor/github.com/golang/snappy/decode_amd64.go deleted file mode 100644 index fcd192b..0000000 --- a/vendor/github.com/golang/snappy/decode_amd64.go +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2016 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm - -package snappy - -// decode has the same semantics as in decode_other.go. -// -//go:noescape -func decode(dst, src []byte) int diff --git a/vendor/github.com/klauspost/compress/snappy/decode_amd64.s b/vendor/github.com/golang/snappy/decode_arm64.s similarity index 67% rename from vendor/github.com/klauspost/compress/snappy/decode_amd64.s rename to vendor/github.com/golang/snappy/decode_arm64.s index 1c66e37..7a3ead1 100644 --- a/vendor/github.com/klauspost/compress/snappy/decode_amd64.s +++ b/vendor/github.com/golang/snappy/decode_arm64.s @@ -1,4 +1,4 @@ -// Copyright 2016 The Go Authors. All rights reserved. +// Copyright 2020 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -15,12 +15,12 @@ // // All local variables fit into registers. The non-zero stack size is only to // spill registers and push args when issuing a CALL. The register allocation: -// - AX scratch -// - BX scratch -// - CX length or x -// - DX offset -// - SI &src[s] -// - DI &dst[d] +// - R2 scratch +// - R3 scratch +// - R4 length or x +// - R5 offset +// - R6 &src[s] +// - R7 &dst[d] // + R8 dst_base // + R9 dst_len // + R10 dst_base + dst_len @@ -33,34 +33,35 @@ // The registers R8-R13 (marked with a "+") are set at the start of the // function, and after a CALL returns, and are not otherwise modified. // -// The d variable is implicitly DI - R8, and len(dst)-d is R10 - DI. -// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI. -TEXT ·decode(SB), NOSPLIT, $48-56 - // Initialize SI, DI and R8-R13. - MOVQ dst_base+0(FP), R8 - MOVQ dst_len+8(FP), R9 - MOVQ R8, DI - MOVQ R8, R10 - ADDQ R9, R10 - MOVQ src_base+24(FP), R11 - MOVQ src_len+32(FP), R12 - MOVQ R11, SI - MOVQ R11, R13 - ADDQ R12, R13 +// The d variable is implicitly R7 - R8, and len(dst)-d is R10 - R7. +// The s variable is implicitly R6 - R11, and len(src)-s is R13 - R6. +TEXT ·decode(SB), NOSPLIT, $56-56 + // Initialize R6, R7 and R8-R13. + MOVD dst_base+0(FP), R8 + MOVD dst_len+8(FP), R9 + MOVD R8, R7 + MOVD R8, R10 + ADD R9, R10, R10 + MOVD src_base+24(FP), R11 + MOVD src_len+32(FP), R12 + MOVD R11, R6 + MOVD R11, R13 + ADD R12, R13, R13 loop: // for s < len(src) - CMPQ SI, R13 - JEQ end + CMP R13, R6 + BEQ end - // CX = uint32(src[s]) + // R4 = uint32(src[s]) // // switch src[s] & 0x03 - MOVBLZX (SI), CX - MOVL CX, BX - ANDL $3, BX - CMPL BX, $1 - JAE tagCopy + MOVBU (R6), R4 + MOVW R4, R3 + ANDW $3, R3 + MOVW $1, R1 + CMPW R1, R3 + BGE tagCopy // ---------------------------------------- // The code below handles literal tags. @@ -68,35 +69,36 @@ loop: // case tagLiteral: // x := uint32(src[s] >> 2) // switch - SHRL $2, CX - CMPL CX, $60 - JAE tagLit60Plus + MOVW $60, R1 + LSRW $2, R4, R4 + CMPW R4, R1 + BLS tagLit60Plus // case x < 60: // s++ - INCQ SI + ADD $1, R6, R6 doLit: // This is the end of the inner "switch", when we have a literal tag. // - // We assume that CX == x and x fits in a uint32, where x is the variable + // We assume that R4 == x and x fits in a uint32, where x is the variable // used in the pure Go decode_other.go code. // length = int(x) + 1 // // Unlike the pure Go code, we don't need to check if length <= 0 because - // CX can hold 64 bits, so the increment cannot overflow. - INCQ CX + // R4 can hold 64 bits, so the increment cannot overflow. + ADD $1, R4, R4 // Prepare to check if copying length bytes will run past the end of dst or // src. // - // AX = len(dst) - d - // BX = len(src) - s - MOVQ R10, AX - SUBQ DI, AX - MOVQ R13, BX - SUBQ SI, BX + // R2 = len(dst) - d + // R3 = len(src) - s + MOVD R10, R2 + SUB R7, R2, R2 + MOVD R13, R3 + SUB R6, R3, R3 // !!! Try a faster technique for short (16 or fewer bytes) copies. // @@ -109,12 +111,12 @@ doLit: // is contiguous in memory and so it needs to leave enough source bytes to // read the next tag without refilling buffers, but Go's Decode assumes // contiguousness (the src argument is a []byte). - CMPQ CX, $16 - JGT callMemmove - CMPQ AX, $16 - JLT callMemmove - CMPQ BX, $16 - JLT callMemmove + CMP $16, R4 + BGT callMemmove + CMP $16, R2 + BLT callMemmove + CMP $16, R3 + BLT callMemmove // !!! Implement the copy from src to dst as a 16-byte load and store. // (Decode's documentation says that dst and src must not overlap.) @@ -124,57 +126,57 @@ doLit: // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a // non-nil error), so the overrun will be ignored. // - // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or + // Note that on arm64, it is legal and cheap to issue unaligned 8-byte or // 16-byte loads and stores. This technique probably wouldn't be as // effective on architectures that are fussier about alignment. - MOVOU 0(SI), X0 - MOVOU X0, 0(DI) + LDP 0(R6), (R14, R15) + STP (R14, R15), 0(R7) // d += length // s += length - ADDQ CX, DI - ADDQ CX, SI - JMP loop + ADD R4, R7, R7 + ADD R4, R6, R6 + B loop callMemmove: // if length > len(dst)-d || length > len(src)-s { etc } - CMPQ CX, AX - JGT errCorrupt - CMPQ CX, BX - JGT errCorrupt + CMP R2, R4 + BGT errCorrupt + CMP R3, R4 + BGT errCorrupt // copy(dst[d:], src[s:s+length]) // // This means calling runtime·memmove(&dst[d], &src[s], length), so we push - // DI, SI and CX as arguments. Coincidentally, we also need to spill those + // R7, R6 and R4 as arguments. Coincidentally, we also need to spill those // three registers to the stack, to save local variables across the CALL. - MOVQ DI, 0(SP) - MOVQ SI, 8(SP) - MOVQ CX, 16(SP) - MOVQ DI, 24(SP) - MOVQ SI, 32(SP) - MOVQ CX, 40(SP) + MOVD R7, 8(RSP) + MOVD R6, 16(RSP) + MOVD R4, 24(RSP) + MOVD R7, 32(RSP) + MOVD R6, 40(RSP) + MOVD R4, 48(RSP) CALL runtime·memmove(SB) // Restore local variables: unspill registers from the stack and // re-calculate R8-R13. - MOVQ 24(SP), DI - MOVQ 32(SP), SI - MOVQ 40(SP), CX - MOVQ dst_base+0(FP), R8 - MOVQ dst_len+8(FP), R9 - MOVQ R8, R10 - ADDQ R9, R10 - MOVQ src_base+24(FP), R11 - MOVQ src_len+32(FP), R12 - MOVQ R11, R13 - ADDQ R12, R13 + MOVD 32(RSP), R7 + MOVD 40(RSP), R6 + MOVD 48(RSP), R4 + MOVD dst_base+0(FP), R8 + MOVD dst_len+8(FP), R9 + MOVD R8, R10 + ADD R9, R10, R10 + MOVD src_base+24(FP), R11 + MOVD src_len+32(FP), R12 + MOVD R11, R13 + ADD R12, R13, R13 // d += length // s += length - ADDQ CX, DI - ADDQ CX, SI - JMP loop + ADD R4, R7, R7 + ADD R4, R6, R6 + B loop tagLit60Plus: // !!! This fragment does the @@ -182,142 +184,150 @@ tagLit60Plus: // s += x - 58; if uint(s) > uint(len(src)) { etc } // // checks. In the asm version, we code it once instead of once per switch case. - ADDQ CX, SI - SUBQ $58, SI - CMPQ SI, R13 - JA errCorrupt + ADD R4, R6, R6 + SUB $58, R6, R6 + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt // case x == 60: - CMPL CX, $61 - JEQ tagLit61 - JA tagLit62Plus + MOVW $61, R1 + CMPW R1, R4 + BEQ tagLit61 + BGT tagLit62Plus // x = uint32(src[s-1]) - MOVBLZX -1(SI), CX - JMP doLit + MOVBU -1(R6), R4 + B doLit tagLit61: // case x == 61: // x = uint32(src[s-2]) | uint32(src[s-1])<<8 - MOVWLZX -2(SI), CX - JMP doLit + MOVHU -2(R6), R4 + B doLit tagLit62Plus: - CMPL CX, $62 - JA tagLit63 + CMPW $62, R4 + BHI tagLit63 // case x == 62: // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 - MOVWLZX -3(SI), CX - MOVBLZX -1(SI), BX - SHLL $16, BX - ORL BX, CX - JMP doLit + MOVHU -3(R6), R4 + MOVBU -1(R6), R3 + ORR R3<<16, R4 + B doLit tagLit63: // case x == 63: // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 - MOVL -4(SI), CX - JMP doLit + MOVWU -4(R6), R4 + B doLit -// The code above handles literal tags. -// ---------------------------------------- -// The code below handles copy tags. + // The code above handles literal tags. + // ---------------------------------------- + // The code below handles copy tags. tagCopy4: // case tagCopy4: // s += 5 - ADDQ $5, SI + ADD $5, R6, R6 // if uint(s) > uint(len(src)) { etc } - CMPQ SI, R13 - JA errCorrupt + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt // length = 1 + int(src[s-5])>>2 - SHRQ $2, CX - INCQ CX + MOVD $1, R1 + ADD R4>>2, R1, R4 // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) - MOVLQZX -4(SI), DX - JMP doCopy + MOVWU -4(R6), R5 + B doCopy tagCopy2: // case tagCopy2: // s += 3 - ADDQ $3, SI + ADD $3, R6, R6 // if uint(s) > uint(len(src)) { etc } - CMPQ SI, R13 - JA errCorrupt + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt // length = 1 + int(src[s-3])>>2 - SHRQ $2, CX - INCQ CX + MOVD $1, R1 + ADD R4>>2, R1, R4 // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) - MOVWQZX -2(SI), DX - JMP doCopy + MOVHU -2(R6), R5 + B doCopy tagCopy: // We have a copy tag. We assume that: - // - BX == src[s] & 0x03 - // - CX == src[s] - CMPQ BX, $2 - JEQ tagCopy2 - JA tagCopy4 + // - R3 == src[s] & 0x03 + // - R4 == src[s] + CMP $2, R3 + BEQ tagCopy2 + BGT tagCopy4 // case tagCopy1: // s += 2 - ADDQ $2, SI + ADD $2, R6, R6 // if uint(s) > uint(len(src)) { etc } - CMPQ SI, R13 - JA errCorrupt + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) - MOVQ CX, DX - ANDQ $0xe0, DX - SHLQ $3, DX - MOVBQZX -1(SI), BX - ORQ BX, DX + MOVD R4, R5 + AND $0xe0, R5 + MOVBU -1(R6), R3 + ORR R5<<3, R3, R5 // length = 4 + int(src[s-2])>>2&0x7 - SHRQ $2, CX - ANDQ $7, CX - ADDQ $4, CX + MOVD $7, R1 + AND R4>>2, R1, R4 + ADD $4, R4, R4 doCopy: // This is the end of the outer "switch", when we have a copy tag. // // We assume that: - // - CX == length && CX > 0 - // - DX == offset + // - R4 == length && R4 > 0 + // - R5 == offset // if offset <= 0 { etc } - CMPQ DX, $0 - JLE errCorrupt + MOVD $0, R1 + CMP R1, R5 + BLE errCorrupt // if d < offset { etc } - MOVQ DI, BX - SUBQ R8, BX - CMPQ BX, DX - JLT errCorrupt + MOVD R7, R3 + SUB R8, R3, R3 + CMP R5, R3 + BLT errCorrupt // if length > len(dst)-d { etc } - MOVQ R10, BX - SUBQ DI, BX - CMPQ CX, BX - JGT errCorrupt + MOVD R10, R3 + SUB R7, R3, R3 + CMP R3, R4 + BGT errCorrupt // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length // // Set: // - R14 = len(dst)-d // - R15 = &dst[d-offset] - MOVQ R10, R14 - SUBQ DI, R14 - MOVQ DI, R15 - SUBQ DX, R15 + MOVD R10, R14 + SUB R7, R14, R14 + MOVD R7, R15 + SUB R5, R15, R15 // !!! Try a faster technique for short (16 or fewer bytes) forward copies. // @@ -332,18 +342,18 @@ doCopy: // } // copy 16 bytes // d += length - CMPQ CX, $16 - JGT slowForwardCopy - CMPQ DX, $8 - JLT slowForwardCopy - CMPQ R14, $16 - JLT slowForwardCopy - MOVQ 0(R15), AX - MOVQ AX, 0(DI) - MOVQ 8(R15), BX - MOVQ BX, 8(DI) - ADDQ CX, DI - JMP loop + CMP $16, R4 + BGT slowForwardCopy + CMP $8, R5 + BLT slowForwardCopy + CMP $16, R14 + BLT slowForwardCopy + MOVD 0(R15), R2 + MOVD R2, 0(R7) + MOVD 8(R15), R3 + MOVD R3, 8(R7) + ADD R4, R7, R7 + B loop slowForwardCopy: // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we @@ -394,9 +404,9 @@ slowForwardCopy: // if length > len(dst)-d-10 { // goto verySlowForwardCopy // } - SUBQ $10, R14 - CMPQ CX, R14 - JGT verySlowForwardCopy + SUB $10, R14, R14 + CMP R14, R4 + BGT verySlowForwardCopy makeOffsetAtLeast8: // !!! As above, expand the pattern so that offset >= 8 and we can use @@ -410,36 +420,37 @@ makeOffsetAtLeast8: // // The two previous lines together means that d-offset, and therefore // // R15, is unchanged. // } - CMPQ DX, $8 - JGE fixUpSlowForwardCopy - MOVQ (R15), BX - MOVQ BX, (DI) - SUBQ DX, CX - ADDQ DX, DI - ADDQ DX, DX - JMP makeOffsetAtLeast8 + CMP $8, R5 + BGE fixUpSlowForwardCopy + MOVD (R15), R3 + MOVD R3, (R7) + SUB R5, R4, R4 + ADD R5, R7, R7 + ADD R5, R5, R5 + B makeOffsetAtLeast8 fixUpSlowForwardCopy: - // !!! Add length (which might be negative now) to d (implied by DI being + // !!! Add length (which might be negative now) to d (implied by R7 being // &dst[d]) so that d ends up at the right place when we jump back to the - // top of the loop. Before we do that, though, we save DI to AX so that, if + // top of the loop. Before we do that, though, we save R7 to R2 so that, if // length is positive, copying the remaining length bytes will write to the // right place. - MOVQ DI, AX - ADDQ CX, DI + MOVD R7, R2 + ADD R4, R7, R7 finishSlowForwardCopy: // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative // length means that we overrun, but as above, that will be fixed up by // subsequent iterations of the outermost loop. - CMPQ CX, $0 - JLE loop - MOVQ (R15), BX - MOVQ BX, (AX) - ADDQ $8, R15 - ADDQ $8, AX - SUBQ $8, CX - JMP finishSlowForwardCopy + MOVD $0, R1 + CMP R1, R4 + BLE loop + MOVD (R15), R3 + MOVD R3, (R2) + ADD $8, R15, R15 + ADD $8, R2, R2 + SUB $8, R4, R4 + B finishSlowForwardCopy verySlowForwardCopy: // verySlowForwardCopy is a simple implementation of forward copy. In C @@ -454,29 +465,30 @@ verySlowForwardCopy: // break // } // } - MOVB (R15), BX - MOVB BX, (DI) - INCQ R15 - INCQ DI - DECQ CX - JNZ verySlowForwardCopy - JMP loop - -// The code above handles copy tags. -// ---------------------------------------- + MOVB (R15), R3 + MOVB R3, (R7) + ADD $1, R15, R15 + ADD $1, R7, R7 + SUB $1, R4, R4 + CBNZ R4, verySlowForwardCopy + B loop + + // The code above handles copy tags. + // ---------------------------------------- end: // This is the end of the "for s < len(src)". // // if d != len(dst) { etc } - CMPQ DI, R10 - JNE errCorrupt + CMP R10, R7 + BNE errCorrupt // return 0 - MOVQ $0, ret+48(FP) + MOVD $0, ret+48(FP) RET errCorrupt: // return decodeErrCodeCorrupt - MOVQ $1, ret+48(FP) + MOVD $1, R2 + MOVD R2, ret+48(FP) RET diff --git a/vendor/github.com/klauspost/compress/snappy/decode_amd64.go b/vendor/github.com/golang/snappy/decode_asm.go similarity index 93% rename from vendor/github.com/klauspost/compress/snappy/decode_amd64.go rename to vendor/github.com/golang/snappy/decode_asm.go index fcd192b..7082b34 100644 --- a/vendor/github.com/klauspost/compress/snappy/decode_amd64.go +++ b/vendor/github.com/golang/snappy/decode_asm.go @@ -5,6 +5,7 @@ // +build !appengine // +build gc // +build !noasm +// +build amd64 arm64 package snappy diff --git a/vendor/github.com/golang/snappy/decode_other.go b/vendor/github.com/golang/snappy/decode_other.go index 8c9f204..2f672be 100644 --- a/vendor/github.com/golang/snappy/decode_other.go +++ b/vendor/github.com/golang/snappy/decode_other.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !amd64 appengine !gc noasm +// +build !amd64,!arm64 appengine !gc noasm package snappy @@ -85,14 +85,28 @@ func decode(dst, src []byte) int { if offset <= 0 || d < offset || length > len(dst)-d { return decodeErrCodeCorrupt } - // Copy from an earlier sub-slice of dst to a later sub-slice. Unlike - // the built-in copy function, this byte-by-byte copy always runs + // Copy from an earlier sub-slice of dst to a later sub-slice. + // If no overlap, use the built-in copy: + if offset >= length { + copy(dst[d:d+length], dst[d-offset:]) + d += length + continue + } + + // Unlike the built-in copy function, this byte-by-byte copy always runs // forwards, even if the slices overlap. Conceptually, this is: // // d += forwardCopy(dst[d:d+length], dst[d-offset:]) - for end := d + length; d != end; d++ { - dst[d] = dst[d-offset] + // + // We align the slices into a and b and show the compiler they are the same size. + // This allows the loop to run without bounds checks. + a := dst[d : d+length] + b := dst[d-offset:] + b = b[:len(a)] + for i := range a { + a[i] = b[i] } + d += length } if d != len(dst) { return decodeErrCodeCorrupt diff --git a/vendor/github.com/golang/snappy/encode.go b/vendor/github.com/golang/snappy/encode.go index 8d393e9..7f23657 100644 --- a/vendor/github.com/golang/snappy/encode.go +++ b/vendor/github.com/golang/snappy/encode.go @@ -15,6 +15,8 @@ import ( // Otherwise, a newly allocated slice will be returned. // // The dst and src must not overlap. It is valid to pass a nil dst. +// +// Encode handles the Snappy block format, not the Snappy stream format. func Encode(dst, src []byte) []byte { if n := MaxEncodedLen(len(src)); n < 0 { panic(ErrTooLarge) @@ -139,6 +141,8 @@ func NewBufferedWriter(w io.Writer) *Writer { } // Writer is an io.Writer that can write Snappy-compressed bytes. +// +// Writer handles the Snappy stream format, not the Snappy block format. type Writer struct { w io.Writer err error diff --git a/vendor/github.com/golang/snappy/encode_arm64.s b/vendor/github.com/golang/snappy/encode_arm64.s new file mode 100644 index 0000000..f8d54ad --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_arm64.s @@ -0,0 +1,722 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The asm code generally follows the pure Go code in encode_other.go, except +// where marked with a "!!!". + +// ---------------------------------------------------------------------------- + +// func emitLiteral(dst, lit []byte) int +// +// All local variables fit into registers. The register allocation: +// - R3 len(lit) +// - R4 n +// - R6 return value +// - R8 &dst[i] +// - R10 &lit[0] +// +// The 32 bytes of stack space is to call runtime·memmove. +// +// The unusual register allocation of local variables, such as R10 for the +// source pointer, matches the allocation used at the call site in encodeBlock, +// which makes it easier to manually inline this function. +TEXT ·emitLiteral(SB), NOSPLIT, $32-56 + MOVD dst_base+0(FP), R8 + MOVD lit_base+24(FP), R10 + MOVD lit_len+32(FP), R3 + MOVD R3, R6 + MOVW R3, R4 + SUBW $1, R4, R4 + + CMPW $60, R4 + BLT oneByte + CMPW $256, R4 + BLT twoBytes + +threeBytes: + MOVD $0xf4, R2 + MOVB R2, 0(R8) + MOVW R4, 1(R8) + ADD $3, R8, R8 + ADD $3, R6, R6 + B memmove + +twoBytes: + MOVD $0xf0, R2 + MOVB R2, 0(R8) + MOVB R4, 1(R8) + ADD $2, R8, R8 + ADD $2, R6, R6 + B memmove + +oneByte: + LSLW $2, R4, R4 + MOVB R4, 0(R8) + ADD $1, R8, R8 + ADD $1, R6, R6 + +memmove: + MOVD R6, ret+48(FP) + + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // R8, R10 and R3 as arguments. + MOVD R8, 8(RSP) + MOVD R10, 16(RSP) + MOVD R3, 24(RSP) + CALL runtime·memmove(SB) + RET + +// ---------------------------------------------------------------------------- + +// func emitCopy(dst []byte, offset, length int) int +// +// All local variables fit into registers. The register allocation: +// - R3 length +// - R7 &dst[0] +// - R8 &dst[i] +// - R11 offset +// +// The unusual register allocation of local variables, such as R11 for the +// offset, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·emitCopy(SB), NOSPLIT, $0-48 + MOVD dst_base+0(FP), R8 + MOVD R8, R7 + MOVD offset+24(FP), R11 + MOVD length+32(FP), R3 + +loop0: + // for length >= 68 { etc } + CMPW $68, R3 + BLT step1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVD $0xfe, R2 + MOVB R2, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUB $64, R3, R3 + B loop0 + +step1: + // if length > 64 { etc } + CMP $64, R3 + BLE step2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVD $0xee, R2 + MOVB R2, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUB $60, R3, R3 + +step2: + // if length >= 12 || offset >= 2048 { goto step3 } + CMP $12, R3 + BGE step3 + CMPW $2048, R11 + BGE step3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(R8) + LSRW $3, R11, R11 + AND $0xe0, R11, R11 + SUB $4, R3, R3 + LSLW $2, R3 + AND $0xff, R3, R3 + ORRW R3, R11, R11 + ORRW $1, R11, R11 + MOVB R11, 0(R8) + ADD $2, R8, R8 + + // Return the number of bytes written. + SUB R7, R8, R8 + MOVD R8, ret+40(FP) + RET + +step3: + // Emit the remaining copy, encoded as 3 bytes. + SUB $1, R3, R3 + AND $0xff, R3, R3 + LSLW $2, R3, R3 + ORRW $2, R3, R3 + MOVB R3, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + + // Return the number of bytes written. + SUB R7, R8, R8 + MOVD R8, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func extendMatch(src []byte, i, j int) int +// +// All local variables fit into registers. The register allocation: +// - R6 &src[0] +// - R7 &src[j] +// - R13 &src[len(src) - 8] +// - R14 &src[len(src)] +// - R15 &src[i] +// +// The unusual register allocation of local variables, such as R15 for a source +// pointer, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·extendMatch(SB), NOSPLIT, $0-48 + MOVD src_base+0(FP), R6 + MOVD src_len+8(FP), R14 + MOVD i+24(FP), R15 + MOVD j+32(FP), R7 + ADD R6, R14, R14 + ADD R6, R15, R15 + ADD R6, R7, R7 + MOVD R14, R13 + SUB $8, R13, R13 + +cmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMP R13, R7 + BHI cmp1 + MOVD (R15), R3 + MOVD (R7), R4 + CMP R4, R3 + BNE bsf + ADD $8, R15, R15 + ADD $8, R7, R7 + B cmp8 + +bsf: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. + // RBIT reverses the bit order, then CLZ counts the leading zeros, the + // combination of which finds the least significant bit which is set. + // The arm64 architecture is little-endian, and the shift by 3 converts + // a bit index to a byte index. + EOR R3, R4, R4 + RBIT R4, R4 + CLZ R4, R4 + ADD R4>>3, R7, R7 + + // Convert from &src[ret] to ret. + SUB R6, R7, R7 + MOVD R7, ret+40(FP) + RET + +cmp1: + // In src's tail, compare 1 byte at a time. + CMP R7, R14 + BLS extendMatchEnd + MOVB (R15), R3 + MOVB (R7), R4 + CMP R4, R3 + BNE extendMatchEnd + ADD $1, R15, R15 + ADD $1, R7, R7 + B cmp1 + +extendMatchEnd: + // Convert from &src[ret] to ret. + SUB R6, R7, R7 + MOVD R7, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func encodeBlock(dst, src []byte) (d int) +// +// All local variables fit into registers, other than "var table". The register +// allocation: +// - R3 . . +// - R4 . . +// - R5 64 shift +// - R6 72 &src[0], tableSize +// - R7 80 &src[s] +// - R8 88 &dst[d] +// - R9 96 sLimit +// - R10 . &src[nextEmit] +// - R11 104 prevHash, currHash, nextHash, offset +// - R12 112 &src[base], skip +// - R13 . &src[nextS], &src[len(src) - 8] +// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x +// - R15 120 candidate +// - R16 . hash constant, 0x1e35a7bd +// - R17 . &table +// - . 128 table +// +// The second column (64, 72, etc) is the stack offset to spill the registers +// when calling other functions. We could pack this slightly tighter, but it's +// simpler to have a dedicated spill map independent of the function called. +// +// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An +// extra 64 bytes, to call other functions, and an extra 64 bytes, to spill +// local variables (registers) during calls gives 32768 + 64 + 64 = 32896. +TEXT ·encodeBlock(SB), 0, $32896-56 + MOVD dst_base+0(FP), R8 + MOVD src_base+24(FP), R7 + MOVD src_len+32(FP), R14 + + // shift, tableSize := uint32(32-8), 1<<8 + MOVD $24, R5 + MOVD $256, R6 + MOVW $0xa7bd, R16 + MOVKW $(0x1e35<<16), R16 + +calcShift: + // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { + // shift-- + // } + MOVD $16384, R2 + CMP R2, R6 + BGE varTable + CMP R14, R6 + BGE varTable + SUB $1, R5, R5 + LSL $1, R6, R6 + B calcShift + +varTable: + // var table [maxTableSize]uint16 + // + // In the asm code, unlike the Go code, we can zero-initialize only the + // first tableSize elements. Each uint16 element is 2 bytes and each + // iterations writes 64 bytes, so we can do only tableSize/32 writes + // instead of the 2048 writes that would zero-initialize all of table's + // 32768 bytes. This clear could overrun the first tableSize elements, but + // it won't overrun the allocated stack size. + ADD $128, RSP, R17 + MOVD R17, R4 + + // !!! R6 = &src[tableSize] + ADD R6<<1, R17, R6 + +memclr: + STP.P (ZR, ZR), 64(R4) + STP (ZR, ZR), -48(R4) + STP (ZR, ZR), -32(R4) + STP (ZR, ZR), -16(R4) + CMP R4, R6 + BHI memclr + + // !!! R6 = &src[0] + MOVD R7, R6 + + // sLimit := len(src) - inputMargin + MOVD R14, R9 + SUB $15, R9, R9 + + // !!! Pre-emptively spill R5, R6 and R9 to the stack. Their values don't + // change for the rest of the function. + MOVD R5, 64(RSP) + MOVD R6, 72(RSP) + MOVD R9, 96(RSP) + + // nextEmit := 0 + MOVD R6, R10 + + // s := 1 + ADD $1, R7, R7 + + // nextHash := hash(load32(src, s), shift) + MOVW 0(R7), R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + +outer: + // for { etc } + + // skip := 32 + MOVD $32, R12 + + // nextS := s + MOVD R7, R13 + + // candidate := 0 + MOVD $0, R15 + +inner0: + // for { etc } + + // s := nextS + MOVD R13, R7 + + // bytesBetweenHashLookups := skip >> 5 + MOVD R12, R14 + LSR $5, R14, R14 + + // nextS = s + bytesBetweenHashLookups + ADD R14, R13, R13 + + // skip += bytesBetweenHashLookups + ADD R14, R12, R12 + + // if nextS > sLimit { goto emitRemainder } + MOVD R13, R3 + SUB R6, R3, R3 + CMP R9, R3 + BHI emitRemainder + + // candidate = int(table[nextHash]) + MOVHU 0(R17)(R11<<1), R15 + + // table[nextHash] = uint16(s) + MOVD R7, R3 + SUB R6, R3, R3 + + MOVH R3, 0(R17)(R11<<1) + + // nextHash = hash(load32(src, nextS), shift) + MOVW 0(R13), R11 + MULW R16, R11 + LSRW R5, R11, R11 + + // if load32(src, s) != load32(src, candidate) { continue } break + MOVW 0(R7), R3 + MOVW (R6)(R15), R4 + CMPW R4, R3 + BNE inner0 + +fourByteMatch: + // As per the encode_other.go code: + // + // A 4-byte match has been found. We'll later see etc. + + // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment + // on inputMargin in encode.go. + MOVD R7, R3 + SUB R10, R3, R3 + CMP $16, R3 + BLE emitLiteralFastPath + + // ---------------------------------------- + // Begin inline of the emitLiteral call. + // + // d += emitLiteral(dst[d:], src[nextEmit:s]) + + MOVW R3, R4 + SUBW $1, R4, R4 + + MOVW $60, R2 + CMPW R2, R4 + BLT inlineEmitLiteralOneByte + MOVW $256, R2 + CMPW R2, R4 + BLT inlineEmitLiteralTwoBytes + +inlineEmitLiteralThreeBytes: + MOVD $0xf4, R1 + MOVB R1, 0(R8) + MOVW R4, 1(R8) + ADD $3, R8, R8 + B inlineEmitLiteralMemmove + +inlineEmitLiteralTwoBytes: + MOVD $0xf0, R1 + MOVB R1, 0(R8) + MOVB R4, 1(R8) + ADD $2, R8, R8 + B inlineEmitLiteralMemmove + +inlineEmitLiteralOneByte: + LSLW $2, R4, R4 + MOVB R4, 0(R8) + ADD $1, R8, R8 + +inlineEmitLiteralMemmove: + // Spill local variables (registers) onto the stack; call; unspill. + // + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // R8, R10 and R3 as arguments. + MOVD R8, 8(RSP) + MOVD R10, 16(RSP) + MOVD R3, 24(RSP) + + // Finish the "d +=" part of "d += emitLiteral(etc)". + ADD R3, R8, R8 + MOVD R7, 80(RSP) + MOVD R8, 88(RSP) + MOVD R15, 120(RSP) + CALL runtime·memmove(SB) + MOVD 64(RSP), R5 + MOVD 72(RSP), R6 + MOVD 80(RSP), R7 + MOVD 88(RSP), R8 + MOVD 96(RSP), R9 + MOVD 120(RSP), R15 + ADD $128, RSP, R17 + MOVW $0xa7bd, R16 + MOVKW $(0x1e35<<16), R16 + B inner1 + +inlineEmitLiteralEnd: + // End inline of the emitLiteral call. + // ---------------------------------------- + +emitLiteralFastPath: + // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". + MOVB R3, R4 + SUBW $1, R4, R4 + AND $0xff, R4, R4 + LSLW $2, R4, R4 + MOVB R4, (R8) + ADD $1, R8, R8 + + // !!! Implement the copy from lit to dst as a 16-byte load and store. + // (Encode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only len(lit) bytes, but that's + // OK. Subsequent iterations will fix up the overrun. + // + // Note that on arm64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + LDP 0(R10), (R0, R1) + STP (R0, R1), 0(R8) + ADD R3, R8, R8 + +inner1: + // for { etc } + + // base := s + MOVD R7, R12 + + // !!! offset := base - candidate + MOVD R12, R11 + SUB R15, R11, R11 + SUB R6, R11, R11 + + // ---------------------------------------- + // Begin inline of the extendMatch call. + // + // s = extendMatch(src, candidate+4, s+4) + + // !!! R14 = &src[len(src)] + MOVD src_len+32(FP), R14 + ADD R6, R14, R14 + + // !!! R13 = &src[len(src) - 8] + MOVD R14, R13 + SUB $8, R13, R13 + + // !!! R15 = &src[candidate + 4] + ADD $4, R15, R15 + ADD R6, R15, R15 + + // !!! s += 4 + ADD $4, R7, R7 + +inlineExtendMatchCmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMP R13, R7 + BHI inlineExtendMatchCmp1 + MOVD (R15), R3 + MOVD (R7), R4 + CMP R4, R3 + BNE inlineExtendMatchBSF + ADD $8, R15, R15 + ADD $8, R7, R7 + B inlineExtendMatchCmp8 + +inlineExtendMatchBSF: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. + // RBIT reverses the bit order, then CLZ counts the leading zeros, the + // combination of which finds the least significant bit which is set. + // The arm64 architecture is little-endian, and the shift by 3 converts + // a bit index to a byte index. + EOR R3, R4, R4 + RBIT R4, R4 + CLZ R4, R4 + ADD R4>>3, R7, R7 + B inlineExtendMatchEnd + +inlineExtendMatchCmp1: + // In src's tail, compare 1 byte at a time. + CMP R7, R14 + BLS inlineExtendMatchEnd + MOVB (R15), R3 + MOVB (R7), R4 + CMP R4, R3 + BNE inlineExtendMatchEnd + ADD $1, R15, R15 + ADD $1, R7, R7 + B inlineExtendMatchCmp1 + +inlineExtendMatchEnd: + // End inline of the extendMatch call. + // ---------------------------------------- + + // ---------------------------------------- + // Begin inline of the emitCopy call. + // + // d += emitCopy(dst[d:], base-candidate, s-base) + + // !!! length := s - base + MOVD R7, R3 + SUB R12, R3, R3 + +inlineEmitCopyLoop0: + // for length >= 68 { etc } + MOVW $68, R2 + CMPW R2, R3 + BLT inlineEmitCopyStep1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVD $0xfe, R1 + MOVB R1, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUBW $64, R3, R3 + B inlineEmitCopyLoop0 + +inlineEmitCopyStep1: + // if length > 64 { etc } + MOVW $64, R2 + CMPW R2, R3 + BLE inlineEmitCopyStep2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVD $0xee, R1 + MOVB R1, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUBW $60, R3, R3 + +inlineEmitCopyStep2: + // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } + MOVW $12, R2 + CMPW R2, R3 + BGE inlineEmitCopyStep3 + MOVW $2048, R2 + CMPW R2, R11 + BGE inlineEmitCopyStep3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(R8) + LSRW $8, R11, R11 + LSLW $5, R11, R11 + SUBW $4, R3, R3 + AND $0xff, R3, R3 + LSLW $2, R3, R3 + ORRW R3, R11, R11 + ORRW $1, R11, R11 + MOVB R11, 0(R8) + ADD $2, R8, R8 + B inlineEmitCopyEnd + +inlineEmitCopyStep3: + // Emit the remaining copy, encoded as 3 bytes. + SUBW $1, R3, R3 + LSLW $2, R3, R3 + ORRW $2, R3, R3 + MOVB R3, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + +inlineEmitCopyEnd: + // End inline of the emitCopy call. + // ---------------------------------------- + + // nextEmit = s + MOVD R7, R10 + + // if s >= sLimit { goto emitRemainder } + MOVD R7, R3 + SUB R6, R3, R3 + CMP R3, R9 + BLS emitRemainder + + // As per the encode_other.go code: + // + // We could immediately etc. + + // x := load64(src, s-1) + MOVD -1(R7), R14 + + // prevHash := hash(uint32(x>>0), shift) + MOVW R14, R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + + // table[prevHash] = uint16(s-1) + MOVD R7, R3 + SUB R6, R3, R3 + SUB $1, R3, R3 + + MOVHU R3, 0(R17)(R11<<1) + + // currHash := hash(uint32(x>>8), shift) + LSR $8, R14, R14 + MOVW R14, R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + + // candidate = int(table[currHash]) + MOVHU 0(R17)(R11<<1), R15 + + // table[currHash] = uint16(s) + ADD $1, R3, R3 + MOVHU R3, 0(R17)(R11<<1) + + // if uint32(x>>8) == load32(src, candidate) { continue } + MOVW (R6)(R15), R4 + CMPW R4, R14 + BEQ inner1 + + // nextHash = hash(uint32(x>>16), shift) + LSR $8, R14, R14 + MOVW R14, R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + + // s++ + ADD $1, R7, R7 + + // break out of the inner1 for loop, i.e. continue the outer loop. + B outer + +emitRemainder: + // if nextEmit < len(src) { etc } + MOVD src_len+32(FP), R3 + ADD R6, R3, R3 + CMP R3, R10 + BEQ encodeBlockEnd + + // d += emitLiteral(dst[d:], src[nextEmit:]) + // + // Push args. + MOVD R8, 8(RSP) + MOVD $0, 16(RSP) // Unnecessary, as the callee ignores it, but conservative. + MOVD $0, 24(RSP) // Unnecessary, as the callee ignores it, but conservative. + MOVD R10, 32(RSP) + SUB R10, R3, R3 + MOVD R3, 40(RSP) + MOVD R3, 48(RSP) // Unnecessary, as the callee ignores it, but conservative. + + // Spill local variables (registers) onto the stack; call; unspill. + MOVD R8, 88(RSP) + CALL ·emitLiteral(SB) + MOVD 88(RSP), R8 + + // Finish the "d +=" part of "d += emitLiteral(etc)". + MOVD 56(RSP), R1 + ADD R1, R8, R8 + +encodeBlockEnd: + MOVD dst_base+0(FP), R3 + SUB R3, R8, R8 + MOVD R8, d+48(FP) + RET diff --git a/vendor/github.com/golang/snappy/encode_amd64.go b/vendor/github.com/golang/snappy/encode_asm.go similarity index 97% rename from vendor/github.com/golang/snappy/encode_amd64.go rename to vendor/github.com/golang/snappy/encode_asm.go index 150d91b..107c1e7 100644 --- a/vendor/github.com/golang/snappy/encode_amd64.go +++ b/vendor/github.com/golang/snappy/encode_asm.go @@ -5,6 +5,7 @@ // +build !appengine // +build gc // +build !noasm +// +build amd64 arm64 package snappy diff --git a/vendor/github.com/golang/snappy/encode_other.go b/vendor/github.com/golang/snappy/encode_other.go index dbcae90..296d7f0 100644 --- a/vendor/github.com/golang/snappy/encode_other.go +++ b/vendor/github.com/golang/snappy/encode_other.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !amd64 appengine !gc noasm +// +build !amd64,!arm64 appengine !gc noasm package snappy diff --git a/vendor/github.com/klauspost/compress/.gitattributes b/vendor/github.com/klauspost/compress/.gitattributes new file mode 100644 index 0000000..4024335 --- /dev/null +++ b/vendor/github.com/klauspost/compress/.gitattributes @@ -0,0 +1,2 @@ +* -text +*.bin -text -diff diff --git a/vendor/github.com/klauspost/compress/.gitignore b/vendor/github.com/klauspost/compress/.gitignore new file mode 100644 index 0000000..d31b378 --- /dev/null +++ b/vendor/github.com/klauspost/compress/.gitignore @@ -0,0 +1,32 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof +/s2/cmd/_s2sx/sfx-exe + +# Linux perf files +perf.data +perf.data.old + +# gdb history +.gdb_history diff --git a/vendor/github.com/klauspost/compress/.goreleaser.yml b/vendor/github.com/klauspost/compress/.goreleaser.yml new file mode 100644 index 0000000..0af08e6 --- /dev/null +++ b/vendor/github.com/klauspost/compress/.goreleaser.yml @@ -0,0 +1,141 @@ +# This is an example goreleaser.yaml file with some sane defaults. +# Make sure to check the documentation at http://goreleaser.com +before: + hooks: + - ./gen.sh + - go install mvdan.cc/garble@latest + +builds: + - + id: "s2c" + binary: s2c + main: ./s2/cmd/s2c/main.go + flags: + - -trimpath + env: + - CGO_ENABLED=0 + goos: + - aix + - linux + - freebsd + - netbsd + - windows + - darwin + goarch: + - 386 + - amd64 + - arm + - arm64 + - ppc64 + - ppc64le + - mips64 + - mips64le + goarm: + - 7 + gobinary: garble + - + id: "s2d" + binary: s2d + main: ./s2/cmd/s2d/main.go + flags: + - -trimpath + env: + - CGO_ENABLED=0 + goos: + - aix + - linux + - freebsd + - netbsd + - windows + - darwin + goarch: + - 386 + - amd64 + - arm + - arm64 + - ppc64 + - ppc64le + - mips64 + - mips64le + goarm: + - 7 + gobinary: garble + - + id: "s2sx" + binary: s2sx + main: ./s2/cmd/_s2sx/main.go + flags: + - -modfile=s2sx.mod + - -trimpath + env: + - CGO_ENABLED=0 + goos: + - aix + - linux + - freebsd + - netbsd + - windows + - darwin + goarch: + - 386 + - amd64 + - arm + - arm64 + - ppc64 + - ppc64le + - mips64 + - mips64le + goarm: + - 7 + gobinary: garble + +archives: + - + id: s2-binaries + name_template: "s2-{{ .Os }}_{{ .Arch }}_{{ .Version }}" + replacements: + aix: AIX + darwin: OSX + linux: Linux + windows: Windows + 386: i386 + amd64: x86_64 + freebsd: FreeBSD + netbsd: NetBSD + format_overrides: + - goos: windows + format: zip + files: + - unpack/* + - s2/LICENSE + - s2/README.md +checksum: + name_template: 'checksums.txt' +snapshot: + name_template: "{{ .Tag }}-next" +changelog: + sort: asc + filters: + exclude: + - '^doc:' + - '^docs:' + - '^test:' + - '^tests:' + - '^Update\sREADME.md' + +nfpms: + - + file_name_template: "s2_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}" + vendor: Klaus Post + homepage: https://github.com/klauspost/compress + maintainer: Klaus Post + description: S2 Compression Tool + license: BSD 3-Clause + formats: + - deb + - rpm + replacements: + darwin: Darwin + linux: Linux + freebsd: FreeBSD + amd64: x86_64 diff --git a/vendor/github.com/klauspost/compress/LICENSE b/vendor/github.com/klauspost/compress/LICENSE index 1eb75ef..87d5574 100644 --- a/vendor/github.com/klauspost/compress/LICENSE +++ b/vendor/github.com/klauspost/compress/LICENSE @@ -26,3 +26,279 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +------------------ + +Files: gzhttp/* + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2016-2017 The New York Times Company + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +------------------ + +Files: s2/cmd/internal/readahead/* + +The MIT License (MIT) + +Copyright (c) 2015 Klaus Post + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +--------------------- +Files: snappy/* +Files: internal/snapref/* + +Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +----------------- + +Files: s2/cmd/internal/filepathx/* + +Copyright 2016 The filepathx Authors + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md new file mode 100644 index 0000000..c3ec9d8 --- /dev/null +++ b/vendor/github.com/klauspost/compress/README.md @@ -0,0 +1,506 @@ +# compress + +This package provides various compression algorithms. + +* [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and decompression in pure Go. +* [S2](https://github.com/klauspost/compress/tree/master/s2#s2-compression) is a high performance replacement for Snappy. +* Optimized [deflate](https://godoc.org/github.com/klauspost/compress/flate) packages which can be used as a dropin replacement for [gzip](https://godoc.org/github.com/klauspost/compress/gzip), [zip](https://godoc.org/github.com/klauspost/compress/zip) and [zlib](https://godoc.org/github.com/klauspost/compress/zlib). +* [snappy](https://github.com/klauspost/compress/tree/master/snappy) is a drop-in replacement for `github.com/golang/snappy` offering better compression and concurrent streams. +* [huff0](https://github.com/klauspost/compress/tree/master/huff0) and [FSE](https://github.com/klauspost/compress/tree/master/fse) implementations for raw entropy encoding. +* [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp) Provides client and server wrappers for handling gzipped requests efficiently. +* [pgzip](https://github.com/klauspost/pgzip) is a separate package that provides a very fast parallel gzip implementation. +* [fuzz package](https://github.com/klauspost/compress-fuzz) for fuzz testing all compressors/decompressors here. + +[![Go Reference](https://pkg.go.dev/badge/klauspost/compress.svg)](https://pkg.go.dev/github.com/klauspost/compress?tab=subdirectories) +[![Go](https://github.com/klauspost/compress/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/compress/actions/workflows/go.yml) +[![Sourcegraph Badge](https://sourcegraph.com/github.com/klauspost/compress/-/badge.svg)](https://sourcegraph.com/github.com/klauspost/compress?badge) + +# changelog + +* May 5, 2022 (v1.15.3) + * zstd: Allow to ignore checksum checking by @WojciechMula [#572](https://github.com/klauspost/compress/pull/572) + * s2: Fix incorrect seek for io.SeekEnd in [#575](https://github.com/klauspost/compress/pull/575) + +* Apr 26, 2022 (v1.15.2) + * zstd: Add x86-64 assembly for decompression on streams and blocks. Contributed by [@WojciechMula](https://github.com/WojciechMula). Typically 2x faster. [#528](https://github.com/klauspost/compress/pull/528) [#531](https://github.com/klauspost/compress/pull/531) [#545](https://github.com/klauspost/compress/pull/545) [#537](https://github.com/klauspost/compress/pull/537) + * zstd: Add options to ZipDecompressor and fixes [#539](https://github.com/klauspost/compress/pull/539) + * s2: Use sorted search for index [#555](https://github.com/klauspost/compress/pull/555) + * Minimum version is Go 1.16, added CI test on 1.18. + +* Mar 11, 2022 (v1.15.1) + * huff0: Add x86 assembly of Decode4X by @WojciechMula in [#512](https://github.com/klauspost/compress/pull/512) + * zstd: Reuse zip decoders in [#514](https://github.com/klauspost/compress/pull/514) + * zstd: Detect extra block data and report as corrupted in [#520](https://github.com/klauspost/compress/pull/520) + * zstd: Handle zero sized frame content size stricter in [#521](https://github.com/klauspost/compress/pull/521) + * zstd: Add stricter block size checks in [#523](https://github.com/klauspost/compress/pull/523) + +* Mar 3, 2022 (v1.15.0) + * zstd: Refactor decoder by @klauspost in [#498](https://github.com/klauspost/compress/pull/498) + * zstd: Add stream encoding without goroutines by @klauspost in [#505](https://github.com/klauspost/compress/pull/505) + * huff0: Prevent single blocks exceeding 16 bits by @klauspost in[#507](https://github.com/klauspost/compress/pull/507) + * flate: Inline literal emission by @klauspost in [#509](https://github.com/klauspost/compress/pull/509) + * gzhttp: Add zstd to transport by @klauspost in [#400](https://github.com/klauspost/compress/pull/400) + * gzhttp: Make content-type optional by @klauspost in [#510](https://github.com/klauspost/compress/pull/510) + +
+ See Details +Both compression and decompression now supports "synchronous" stream operations. This means that whenever "concurrency" is set to 1, they will operate without spawning goroutines. + +Stream decompression is now faster on asynchronous, since the goroutine allocation much more effectively splits the workload. On typical streams this will typically use 2 cores fully for decompression. When a stream has finished decoding no goroutines will be left over, so decoders can now safely be pooled and still be garbage collected. + +While the release has been extensively tested, it is recommended to testing when upgrading. +
+ +* Feb 22, 2022 (v1.14.4) + * flate: Fix rare huffman only (-2) corruption. [#503](https://github.com/klauspost/compress/pull/503) + * zip: Update deprecated CreateHeaderRaw to correctly call CreateRaw by @saracen in [#502](https://github.com/klauspost/compress/pull/502) + * zip: don't read data descriptor early by @saracen in [#501](https://github.com/klauspost/compress/pull/501) #501 + * huff0: Use static decompression buffer up to 30% faster by @klauspost in [#499](https://github.com/klauspost/compress/pull/499) [#500](https://github.com/klauspost/compress/pull/500) + +* Feb 17, 2022 (v1.14.3) + * flate: Improve fastest levels compression speed ~10% more throughput. [#482](https://github.com/klauspost/compress/pull/482) [#489](https://github.com/klauspost/compress/pull/489) [#490](https://github.com/klauspost/compress/pull/490) [#491](https://github.com/klauspost/compress/pull/491) [#494](https://github.com/klauspost/compress/pull/494) [#478](https://github.com/klauspost/compress/pull/478) + * flate: Faster decompression speed, ~5-10%. [#483](https://github.com/klauspost/compress/pull/483) + * s2: Faster compression with Go v1.18 and amd64 microarch level 3+. [#484](https://github.com/klauspost/compress/pull/484) [#486](https://github.com/klauspost/compress/pull/486) + +* Jan 25, 2022 (v1.14.2) + * zstd: improve header decoder by @dsnet [#476](https://github.com/klauspost/compress/pull/476) + * zstd: Add bigger default blocks [#469](https://github.com/klauspost/compress/pull/469) + * zstd: Remove unused decompression buffer [#470](https://github.com/klauspost/compress/pull/470) + * zstd: Fix logically dead code by @ningmingxiao [#472](https://github.com/klauspost/compress/pull/472) + * flate: Improve level 7-9 [#471](https://github.com/klauspost/compress/pull/471) [#473](https://github.com/klauspost/compress/pull/473) + * zstd: Add noasm tag for xxhash [#475](https://github.com/klauspost/compress/pull/475) + +* Jan 11, 2022 (v1.14.1) + * s2: Add stream index in [#462](https://github.com/klauspost/compress/pull/462) + * flate: Speed and efficiency improvements in [#439](https://github.com/klauspost/compress/pull/439) [#461](https://github.com/klauspost/compress/pull/461) [#455](https://github.com/klauspost/compress/pull/455) [#452](https://github.com/klauspost/compress/pull/452) [#458](https://github.com/klauspost/compress/pull/458) + * zstd: Performance improvement in [#420]( https://github.com/klauspost/compress/pull/420) [#456](https://github.com/klauspost/compress/pull/456) [#437](https://github.com/klauspost/compress/pull/437) [#467](https://github.com/klauspost/compress/pull/467) [#468](https://github.com/klauspost/compress/pull/468) + * zstd: add arm64 xxhash assembly in [#464](https://github.com/klauspost/compress/pull/464) + * Add garbled for binaries for s2 in [#445](https://github.com/klauspost/compress/pull/445) + +* Aug 30, 2021 (v1.13.5) + * gz/zlib/flate: Alias stdlib errors [#425](https://github.com/klauspost/compress/pull/425) + * s2: Add block support to commandline tools [#413](https://github.com/klauspost/compress/pull/413) + * zstd: pooledZipWriter should return Writers to the same pool [#426](https://github.com/klauspost/compress/pull/426) + * Removed golang/snappy as external dependency for tests [#421](https://github.com/klauspost/compress/pull/421) + +* Aug 12, 2021 (v1.13.4) + * Add [snappy replacement package](https://github.com/klauspost/compress/tree/master/snappy). + * zstd: Fix incorrect encoding in "best" mode [#415](https://github.com/klauspost/compress/pull/415) + +* Aug 3, 2021 (v1.13.3) + * zstd: Improve Best compression [#404](https://github.com/klauspost/compress/pull/404) + * zstd: Fix WriteTo error forwarding [#411](https://github.com/klauspost/compress/pull/411) + * gzhttp: Return http.HandlerFunc instead of http.Handler. Unlikely breaking change. [#406](https://github.com/klauspost/compress/pull/406) + * s2sx: Fix max size error [#399](https://github.com/klauspost/compress/pull/399) + * zstd: Add optional stream content size on reset [#401](https://github.com/klauspost/compress/pull/401) + * zstd: use SpeedBestCompression for level >= 10 [#410](https://github.com/klauspost/compress/pull/410) + +* Jun 14, 2021 (v1.13.1) + * s2: Add full Snappy output support [#396](https://github.com/klauspost/compress/pull/396) + * zstd: Add configurable [Decoder window](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithDecoderMaxWindow) size [#394](https://github.com/klauspost/compress/pull/394) + * gzhttp: Add header to skip compression [#389](https://github.com/klauspost/compress/pull/389) + * s2: Improve speed with bigger output margin [#395](https://github.com/klauspost/compress/pull/395) + +* Jun 3, 2021 (v1.13.0) + * Added [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp#gzip-handler) which allows wrapping HTTP servers and clients with GZIP compressors. + * zstd: Detect short invalid signatures [#382](https://github.com/klauspost/compress/pull/382) + * zstd: Spawn decoder goroutine only if needed. [#380](https://github.com/klauspost/compress/pull/380) + +
+ See changes to v1.12.x + +* May 25, 2021 (v1.12.3) + * deflate: Better/faster Huffman encoding [#374](https://github.com/klauspost/compress/pull/374) + * deflate: Allocate less for history. [#375](https://github.com/klauspost/compress/pull/375) + * zstd: Forward read errors [#373](https://github.com/klauspost/compress/pull/373) + +* Apr 27, 2021 (v1.12.2) + * zstd: Improve better/best compression [#360](https://github.com/klauspost/compress/pull/360) [#364](https://github.com/klauspost/compress/pull/364) [#365](https://github.com/klauspost/compress/pull/365) + * zstd: Add helpers to compress/decompress zstd inside zip files [#363](https://github.com/klauspost/compress/pull/363) + * deflate: Improve level 5+6 compression [#367](https://github.com/klauspost/compress/pull/367) + * s2: Improve better/best compression [#358](https://github.com/klauspost/compress/pull/358) [#359](https://github.com/klauspost/compress/pull/358) + * s2: Load after checking src limit on amd64. [#362](https://github.com/klauspost/compress/pull/362) + * s2sx: Limit max executable size [#368](https://github.com/klauspost/compress/pull/368) + +* Apr 14, 2021 (v1.12.1) + * snappy package removed. Upstream added as dependency. + * s2: Better compression in "best" mode [#353](https://github.com/klauspost/compress/pull/353) + * s2sx: Add stdin input and detect pre-compressed from signature [#352](https://github.com/klauspost/compress/pull/352) + * s2c/s2d: Add http as possible input [#348](https://github.com/klauspost/compress/pull/348) + * s2c/s2d/s2sx: Always truncate when writing files [#352](https://github.com/klauspost/compress/pull/352) + * zstd: Reduce memory usage further when using [WithLowerEncoderMem](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithLowerEncoderMem) [#346](https://github.com/klauspost/compress/pull/346) + * s2: Fix potential problem with amd64 assembly and profilers [#349](https://github.com/klauspost/compress/pull/349) +
+ +
+ See changes to v1.11.x + +* Mar 26, 2021 (v1.11.13) + * zstd: Big speedup on small dictionary encodes [#344](https://github.com/klauspost/compress/pull/344) [#345](https://github.com/klauspost/compress/pull/345) + * zstd: Add [WithLowerEncoderMem](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithLowerEncoderMem) encoder option [#336](https://github.com/klauspost/compress/pull/336) + * deflate: Improve entropy compression [#338](https://github.com/klauspost/compress/pull/338) + * s2: Clean up and minor performance improvement in best [#341](https://github.com/klauspost/compress/pull/341) + +* Mar 5, 2021 (v1.11.12) + * s2: Add `s2sx` binary that creates [self extracting archives](https://github.com/klauspost/compress/tree/master/s2#s2sx-self-extracting-archives). + * s2: Speed up decompression on non-assembly platforms [#328](https://github.com/klauspost/compress/pull/328) + +* Mar 1, 2021 (v1.11.9) + * s2: Add ARM64 decompression assembly. Around 2x output speed. [#324](https://github.com/klauspost/compress/pull/324) + * s2: Improve "better" speed and efficiency. [#325](https://github.com/klauspost/compress/pull/325) + * s2: Fix binaries. + +* Feb 25, 2021 (v1.11.8) + * s2: Fixed occational out-of-bounds write on amd64. Upgrade recommended. + * s2: Add AMD64 assembly for better mode. 25-50% faster. [#315](https://github.com/klauspost/compress/pull/315) + * s2: Less upfront decoder allocation. [#322](https://github.com/klauspost/compress/pull/322) + * zstd: Faster "compression" of incompressible data. [#314](https://github.com/klauspost/compress/pull/314) + * zip: Fix zip64 headers. [#313](https://github.com/klauspost/compress/pull/313) + +* Jan 14, 2021 (v1.11.7) + * Use Bytes() interface to get bytes across packages. [#309](https://github.com/klauspost/compress/pull/309) + * s2: Add 'best' compression option. [#310](https://github.com/klauspost/compress/pull/310) + * s2: Add ReaderMaxBlockSize, changes `s2.NewReader` signature to include varargs. [#311](https://github.com/klauspost/compress/pull/311) + * s2: Fix crash on small better buffers. [#308](https://github.com/klauspost/compress/pull/308) + * s2: Clean up decoder. [#312](https://github.com/klauspost/compress/pull/312) + +* Jan 7, 2021 (v1.11.6) + * zstd: Make decoder allocations smaller [#306](https://github.com/klauspost/compress/pull/306) + * zstd: Free Decoder resources when Reset is called with a nil io.Reader [#305](https://github.com/klauspost/compress/pull/305) + +* Dec 20, 2020 (v1.11.4) + * zstd: Add Best compression mode [#304](https://github.com/klauspost/compress/pull/304) + * Add header decoder [#299](https://github.com/klauspost/compress/pull/299) + * s2: Add uncompressed stream option [#297](https://github.com/klauspost/compress/pull/297) + * Simplify/speed up small blocks with known max size. [#300](https://github.com/klauspost/compress/pull/300) + * zstd: Always reset literal dict encoder [#303](https://github.com/klauspost/compress/pull/303) + +* Nov 15, 2020 (v1.11.3) + * inflate: 10-15% faster decompression [#293](https://github.com/klauspost/compress/pull/293) + * zstd: Tweak DecodeAll default allocation [#295](https://github.com/klauspost/compress/pull/295) + +* Oct 11, 2020 (v1.11.2) + * s2: Fix out of bounds read in "better" block compression [#291](https://github.com/klauspost/compress/pull/291) + +* Oct 1, 2020 (v1.11.1) + * zstd: Set allLitEntropy true in default configuration [#286](https://github.com/klauspost/compress/pull/286) + +* Sept 8, 2020 (v1.11.0) + * zstd: Add experimental compression [dictionaries](https://github.com/klauspost/compress/tree/master/zstd#dictionaries) [#281](https://github.com/klauspost/compress/pull/281) + * zstd: Fix mixed Write and ReadFrom calls [#282](https://github.com/klauspost/compress/pull/282) + * inflate/gz: Limit variable shifts, ~5% faster decompression [#274](https://github.com/klauspost/compress/pull/274) +
+ +
+ See changes to v1.10.x + +* July 8, 2020 (v1.10.11) + * zstd: Fix extra block when compressing with ReadFrom. [#278](https://github.com/klauspost/compress/pull/278) + * huff0: Also populate compression table when reading decoding table. [#275](https://github.com/klauspost/compress/pull/275) + +* June 23, 2020 (v1.10.10) + * zstd: Skip entropy compression in fastest mode when no matches. [#270](https://github.com/klauspost/compress/pull/270) + +* June 16, 2020 (v1.10.9): + * zstd: API change for specifying dictionaries. See [#268](https://github.com/klauspost/compress/pull/268) + * zip: update CreateHeaderRaw to handle zip64 fields. [#266](https://github.com/klauspost/compress/pull/266) + * Fuzzit tests removed. The service has been purchased and is no longer available. + +* June 5, 2020 (v1.10.8): + * 1.15x faster zstd block decompression. [#265](https://github.com/klauspost/compress/pull/265) + +* June 1, 2020 (v1.10.7): + * Added zstd decompression [dictionary support](https://github.com/klauspost/compress/tree/master/zstd#dictionaries) + * Increase zstd decompression speed up to 1.19x. [#259](https://github.com/klauspost/compress/pull/259) + * Remove internal reset call in zstd compression and reduce allocations. [#263](https://github.com/klauspost/compress/pull/263) + +* May 21, 2020: (v1.10.6) + * zstd: Reduce allocations while decoding. [#258](https://github.com/klauspost/compress/pull/258), [#252](https://github.com/klauspost/compress/pull/252) + * zstd: Stricter decompression checks. + +* April 12, 2020: (v1.10.5) + * s2-commands: Flush output when receiving SIGINT. [#239](https://github.com/klauspost/compress/pull/239) + +* Apr 8, 2020: (v1.10.4) + * zstd: Minor/special case optimizations. [#251](https://github.com/klauspost/compress/pull/251), [#250](https://github.com/klauspost/compress/pull/250), [#249](https://github.com/klauspost/compress/pull/249), [#247](https://github.com/klauspost/compress/pull/247) +* Mar 11, 2020: (v1.10.3) + * s2: Use S2 encoder in pure Go mode for Snappy output as well. [#245](https://github.com/klauspost/compress/pull/245) + * s2: Fix pure Go block encoder. [#244](https://github.com/klauspost/compress/pull/244) + * zstd: Added "better compression" mode. [#240](https://github.com/klauspost/compress/pull/240) + * zstd: Improve speed of fastest compression mode by 5-10% [#241](https://github.com/klauspost/compress/pull/241) + * zstd: Skip creating encoders when not needed. [#238](https://github.com/klauspost/compress/pull/238) + +* Feb 27, 2020: (v1.10.2) + * Close to 50% speedup in inflate (gzip/zip decompression). [#236](https://github.com/klauspost/compress/pull/236) [#234](https://github.com/klauspost/compress/pull/234) [#232](https://github.com/klauspost/compress/pull/232) + * Reduce deflate level 1-6 memory usage up to 59%. [#227](https://github.com/klauspost/compress/pull/227) + +* Feb 18, 2020: (v1.10.1) + * Fix zstd crash when resetting multiple times without sending data. [#226](https://github.com/klauspost/compress/pull/226) + * deflate: Fix dictionary use on level 1-6. [#224](https://github.com/klauspost/compress/pull/224) + * Remove deflate writer reference when closing. [#224](https://github.com/klauspost/compress/pull/224) + +* Feb 4, 2020: (v1.10.0) + * Add optional dictionary to [stateless deflate](https://pkg.go.dev/github.com/klauspost/compress/flate?tab=doc#StatelessDeflate). Breaking change, send `nil` for previous behaviour. [#216](https://github.com/klauspost/compress/pull/216) + * Fix buffer overflow on repeated small block deflate. [#218](https://github.com/klauspost/compress/pull/218) + * Allow copying content from an existing ZIP file without decompressing+compressing. [#214](https://github.com/klauspost/compress/pull/214) + * Added [S2](https://github.com/klauspost/compress/tree/master/s2#s2-compression) AMD64 assembler and various optimizations. Stream speed >10GB/s. [#186](https://github.com/klauspost/compress/pull/186) + +
+ +
+ See changes prior to v1.10.0 + +* Jan 20,2020 (v1.9.8) Optimize gzip/deflate with better size estimates and faster table generation. [#207](https://github.com/klauspost/compress/pull/207) by [luyu6056](https://github.com/luyu6056), [#206](https://github.com/klauspost/compress/pull/206). +* Jan 11, 2020: S2 Encode/Decode will use provided buffer if capacity is big enough. [#204](https://github.com/klauspost/compress/pull/204) +* Jan 5, 2020: (v1.9.7) Fix another zstd regression in v1.9.5 - v1.9.6 removed. +* Jan 4, 2020: (v1.9.6) Regression in v1.9.5 fixed causing corrupt zstd encodes in rare cases. +* Jan 4, 2020: Faster IO in [s2c + s2d commandline tools](https://github.com/klauspost/compress/tree/master/s2#commandline-tools) compression/decompression. [#192](https://github.com/klauspost/compress/pull/192) +* Dec 29, 2019: Removed v1.9.5 since fuzz tests showed a compatibility problem with the reference zstandard decoder. +* Dec 29, 2019: (v1.9.5) zstd: 10-20% faster block compression. [#199](https://github.com/klauspost/compress/pull/199) +* Dec 29, 2019: [zip](https://godoc.org/github.com/klauspost/compress/zip) package updated with latest Go features +* Dec 29, 2019: zstd: Single segment flag condintions tweaked. [#197](https://github.com/klauspost/compress/pull/197) +* Dec 18, 2019: s2: Faster compression when ReadFrom is used. [#198](https://github.com/klauspost/compress/pull/198) +* Dec 10, 2019: s2: Fix repeat length output when just above at 16MB limit. +* Dec 10, 2019: zstd: Add function to get decoder as io.ReadCloser. [#191](https://github.com/klauspost/compress/pull/191) +* Dec 3, 2019: (v1.9.4) S2: limit max repeat length. [#188](https://github.com/klauspost/compress/pull/188) +* Dec 3, 2019: Add [WithNoEntropyCompression](https://godoc.org/github.com/klauspost/compress/zstd#WithNoEntropyCompression) to zstd [#187](https://github.com/klauspost/compress/pull/187) +* Dec 3, 2019: Reduce memory use for tests. Check for leaked goroutines. +* Nov 28, 2019 (v1.9.3) Less allocations in stateless deflate. +* Nov 28, 2019: 5-20% Faster huff0 decode. Impacts zstd as well. [#184](https://github.com/klauspost/compress/pull/184) +* Nov 12, 2019 (v1.9.2) Added [Stateless Compression](#stateless-compression) for gzip/deflate. +* Nov 12, 2019: Fixed zstd decompression of large single blocks. [#180](https://github.com/klauspost/compress/pull/180) +* Nov 11, 2019: Set default [s2c](https://github.com/klauspost/compress/tree/master/s2#commandline-tools) block size to 4MB. +* Nov 11, 2019: Reduce inflate memory use by 1KB. +* Nov 10, 2019: Less allocations in deflate bit writer. +* Nov 10, 2019: Fix inconsistent error returned by zstd decoder. +* Oct 28, 2019 (v1.9.1) ztsd: Fix crash when compressing blocks. [#174](https://github.com/klauspost/compress/pull/174) +* Oct 24, 2019 (v1.9.0) zstd: Fix rare data corruption [#173](https://github.com/klauspost/compress/pull/173) +* Oct 24, 2019 zstd: Fix huff0 out of buffer write [#171](https://github.com/klauspost/compress/pull/171) and always return errors [#172](https://github.com/klauspost/compress/pull/172) +* Oct 10, 2019: Big deflate rewrite, 30-40% faster with better compression [#105](https://github.com/klauspost/compress/pull/105) + +
+ +
+ See changes prior to v1.9.0 + +* Oct 10, 2019: (v1.8.6) zstd: Allow partial reads to get flushed data. [#169](https://github.com/klauspost/compress/pull/169) +* Oct 3, 2019: Fix inconsistent results on broken zstd streams. +* Sep 25, 2019: Added `-rm` (remove source files) and `-q` (no output except errors) to `s2c` and `s2d` [commands](https://github.com/klauspost/compress/tree/master/s2#commandline-tools) +* Sep 16, 2019: (v1.8.4) Add `s2c` and `s2d` [commandline tools](https://github.com/klauspost/compress/tree/master/s2#commandline-tools). +* Sep 10, 2019: (v1.8.3) Fix s2 decoder [Skip](https://godoc.org/github.com/klauspost/compress/s2#Reader.Skip). +* Sep 7, 2019: zstd: Added [WithWindowSize](https://godoc.org/github.com/klauspost/compress/zstd#WithWindowSize), contributed by [ianwilkes](https://github.com/ianwilkes). +* Sep 5, 2019: (v1.8.2) Add [WithZeroFrames](https://godoc.org/github.com/klauspost/compress/zstd#WithZeroFrames) which adds full zero payload block encoding option. +* Sep 5, 2019: Lazy initialization of zstandard predefined en/decoder tables. +* Aug 26, 2019: (v1.8.1) S2: 1-2% compression increase in "better" compression mode. +* Aug 26, 2019: zstd: Check maximum size of Huffman 1X compressed literals while decoding. +* Aug 24, 2019: (v1.8.0) Added [S2 compression](https://github.com/klauspost/compress/tree/master/s2#s2-compression), a high performance replacement for Snappy. +* Aug 21, 2019: (v1.7.6) Fixed minor issues found by fuzzer. One could lead to zstd not decompressing. +* Aug 18, 2019: Add [fuzzit](https://fuzzit.dev/) continuous fuzzing. +* Aug 14, 2019: zstd: Skip incompressible data 2x faster. [#147](https://github.com/klauspost/compress/pull/147) +* Aug 4, 2019 (v1.7.5): Better literal compression. [#146](https://github.com/klauspost/compress/pull/146) +* Aug 4, 2019: Faster zstd compression. [#143](https://github.com/klauspost/compress/pull/143) [#144](https://github.com/klauspost/compress/pull/144) +* Aug 4, 2019: Faster zstd decompression. [#145](https://github.com/klauspost/compress/pull/145) [#143](https://github.com/klauspost/compress/pull/143) [#142](https://github.com/klauspost/compress/pull/142) +* July 15, 2019 (v1.7.4): Fix double EOF block in rare cases on zstd encoder. +* July 15, 2019 (v1.7.3): Minor speedup/compression increase in default zstd encoder. +* July 14, 2019: zstd decoder: Fix decompression error on multiple uses with mixed content. +* July 7, 2019 (v1.7.2): Snappy update, zstd decoder potential race fix. +* June 17, 2019: zstd decompression bugfix. +* June 17, 2019: fix 32 bit builds. +* June 17, 2019: Easier use in modules (less dependencies). +* June 9, 2019: New stronger "default" [zstd](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression mode. Matches zstd default compression ratio. +* June 5, 2019: 20-40% throughput in [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and better compression. +* June 5, 2019: deflate/gzip compression: Reduce memory usage of lower compression levels. +* June 2, 2019: Added [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression! +* May 25, 2019: deflate/gzip: 10% faster bit writer, mostly visible in lower levels. +* Apr 22, 2019: [zstd](https://github.com/klauspost/compress/tree/master/zstd#zstd) decompression added. +* Aug 1, 2018: Added [huff0 README](https://github.com/klauspost/compress/tree/master/huff0#huff0-entropy-compression). +* Jul 8, 2018: Added [Performance Update 2018](#performance-update-2018) below. +* Jun 23, 2018: Merged [Go 1.11 inflate optimizations](https://go-review.googlesource.com/c/go/+/102235). Go 1.9 is now required. Backwards compatible version tagged with [v1.3.0](https://github.com/klauspost/compress/releases/tag/v1.3.0). +* Apr 2, 2018: Added [huff0](https://godoc.org/github.com/klauspost/compress/huff0) en/decoder. Experimental for now, API may change. +* Mar 4, 2018: Added [FSE Entropy](https://godoc.org/github.com/klauspost/compress/fse) en/decoder. Experimental for now, API may change. +* Nov 3, 2017: Add compression [Estimate](https://godoc.org/github.com/klauspost/compress#Estimate) function. +* May 28, 2017: Reduce allocations when resetting decoder. +* Apr 02, 2017: Change back to official crc32, since changes were merged in Go 1.7. +* Jan 14, 2017: Reduce stack pressure due to array copies. See [Issue #18625](https://github.com/golang/go/issues/18625). +* Oct 25, 2016: Level 2-4 have been rewritten and now offers significantly better performance than before. +* Oct 20, 2016: Port zlib changes from Go 1.7 to fix zlib writer issue. Please update. +* Oct 16, 2016: Go 1.7 changes merged. Apples to apples this package is a few percent faster, but has a significantly better balance between speed and compression per level. +* Mar 24, 2016: Always attempt Huffman encoding on level 4-7. This improves base 64 encoded data compression. +* Mar 24, 2016: Small speedup for level 1-3. +* Feb 19, 2016: Faster bit writer, level -2 is 15% faster, level 1 is 4% faster. +* Feb 19, 2016: Handle small payloads faster in level 1-3. +* Feb 19, 2016: Added faster level 2 + 3 compression modes. +* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progresssion in terms of compression. New default level is 5. +* Feb 14, 2016: Snappy: Merge upstream changes. +* Feb 14, 2016: Snappy: Fix aggressive skipping. +* Feb 14, 2016: Snappy: Update benchmark. +* Feb 13, 2016: Deflate: Fixed assembler problem that could lead to sub-optimal compression. +* Feb 12, 2016: Snappy: Added AMD64 SSE 4.2 optimizations to matching, which makes easy to compress material run faster. Typical speedup is around 25%. +* Feb 9, 2016: Added Snappy package fork. This version is 5-7% faster, much more on hard to compress content. +* Jan 30, 2016: Optimize level 1 to 3 by not considering static dictionary or storing uncompressed. ~4-5% speedup. +* Jan 16, 2016: Optimization on deflate level 1,2,3 compression. +* Jan 8 2016: Merge [CL 18317](https://go-review.googlesource.com/#/c/18317): fix reading, writing of zip64 archives. +* Dec 8 2015: Make level 1 and -2 deterministic even if write size differs. +* Dec 8 2015: Split encoding functions, so hashing and matching can potentially be inlined. 1-3% faster on AMD64. 5% faster on other platforms. +* Dec 8 2015: Fixed rare [one byte out-of bounds read](https://github.com/klauspost/compress/issues/20). Please update! +* Nov 23 2015: Optimization on token writer. ~2-4% faster. Contributed by [@dsnet](https://github.com/dsnet). +* Nov 20 2015: Small optimization to bit writer on 64 bit systems. +* Nov 17 2015: Fixed out-of-bound errors if the underlying Writer returned an error. See [#15](https://github.com/klauspost/compress/issues/15). +* Nov 12 2015: Added [io.WriterTo](https://golang.org/pkg/io/#WriterTo) support to gzip/inflate. +* Nov 11 2015: Merged [CL 16669](https://go-review.googlesource.com/#/c/16669/4): archive/zip: enable overriding (de)compressors per file +* Oct 15 2015: Added skipping on uncompressible data. Random data speed up >5x. + +
+ +# deflate usage + +The packages are drop-in replacements for standard libraries. Simply replace the import path to use them: + +| old import | new import | Documentation +|--------------------|-----------------------------------------|--------------------| +| `compress/gzip` | `github.com/klauspost/compress/gzip` | [gzip](https://pkg.go.dev/github.com/klauspost/compress/gzip?tab=doc) +| `compress/zlib` | `github.com/klauspost/compress/zlib` | [zlib](https://pkg.go.dev/github.com/klauspost/compress/zlib?tab=doc) +| `archive/zip` | `github.com/klauspost/compress/zip` | [zip](https://pkg.go.dev/github.com/klauspost/compress/zip?tab=doc) +| `compress/flate` | `github.com/klauspost/compress/flate` | [flate](https://pkg.go.dev/github.com/klauspost/compress/flate?tab=doc) + +* Optimized [deflate](https://godoc.org/github.com/klauspost/compress/flate) packages which can be used as a dropin replacement for [gzip](https://godoc.org/github.com/klauspost/compress/gzip), [zip](https://godoc.org/github.com/klauspost/compress/zip) and [zlib](https://godoc.org/github.com/klauspost/compress/zlib). + +You may also be interested in [pgzip](https://github.com/klauspost/pgzip), which is a drop in replacement for gzip, which support multithreaded compression on big files and the optimized [crc32](https://github.com/klauspost/crc32) package used by these packages. + +The packages contains the same as the standard library, so you can use the godoc for that: [gzip](http://golang.org/pkg/compress/gzip/), [zip](http://golang.org/pkg/archive/zip/), [zlib](http://golang.org/pkg/compress/zlib/), [flate](http://golang.org/pkg/compress/flate/). + +Currently there is only minor speedup on decompression (mostly CRC32 calculation). + +Memory usage is typically 1MB for a Writer. stdlib is in the same range. +If you expect to have a lot of concurrently allocated Writers consider using +the stateless compress described below. + +For compression performance, see: [this spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing). + +# Stateless compression + +This package offers stateless compression as a special option for gzip/deflate. +It will do compression but without maintaining any state between Write calls. + +This means there will be no memory kept between Write calls, but compression and speed will be suboptimal. + +This is only relevant in cases where you expect to run many thousands of compressors concurrently, +but with very little activity. This is *not* intended for regular web servers serving individual requests. + +Because of this, the size of actual Write calls will affect output size. + +In gzip, specify level `-3` / `gzip.StatelessCompression` to enable. + +For direct deflate use, NewStatelessWriter and StatelessDeflate are available. See [documentation](https://godoc.org/github.com/klauspost/compress/flate#NewStatelessWriter) + +A `bufio.Writer` can of course be used to control write sizes. For example, to use a 4KB buffer: + +``` + // replace 'ioutil.Discard' with your output. + gzw, err := gzip.NewWriterLevel(ioutil.Discard, gzip.StatelessCompression) + if err != nil { + return err + } + defer gzw.Close() + + w := bufio.NewWriterSize(gzw, 4096) + defer w.Flush() + + // Write to 'w' +``` + +This will only use up to 4KB in memory when the writer is idle. + +Compression is almost always worse than the fastest compression level +and each write will allocate (a little) memory. + +# Performance Update 2018 + +It has been a while since we have been looking at the speed of this package compared to the standard library, so I thought I would re-do my tests and give some overall recommendations based on the current state. All benchmarks have been performed with Go 1.10 on my Desktop Intel(R) Core(TM) i7-2600 CPU @3.40GHz. Since I last ran the tests, I have gotten more RAM, which means tests with big files are no longer limited by my SSD. + +The raw results are in my [updated spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing). Due to cgo changes and upstream updates i could not get the cgo version of gzip to compile. Instead I included the [zstd](https://github.com/datadog/zstd) cgo implementation. If I get cgo gzip to work again, I might replace the results in the sheet. + +The columns to take note of are: *MB/s* - the throughput. *Reduction* - the data size reduction in percent of the original. *Rel Speed* relative speed compared to the standard library at the same level. *Smaller* - how many percent smaller is the compressed output compared to stdlib. Negative means the output was bigger. *Loss* means the loss (or gain) in compression as a percentage difference of the input. + +The `gzstd` (standard library gzip) and `gzkp` (this package gzip) only uses one CPU core. [`pgzip`](https://github.com/klauspost/pgzip), [`bgzf`](https://github.com/biogo/hts/tree/master/bgzf) uses all 4 cores. [`zstd`](https://github.com/DataDog/zstd) uses one core, and is a beast (but not Go, yet). + + +## Overall differences. + +There appears to be a roughly 5-10% speed advantage over the standard library when comparing at similar compression levels. + +The biggest difference you will see is the result of [re-balancing](https://blog.klauspost.com/rebalancing-deflate-compression-levels/) the compression levels. I wanted by library to give a smoother transition between the compression levels than the standard library. + +This package attempts to provide a more smooth transition, where "1" is taking a lot of shortcuts, "5" is the reasonable trade-off and "9" is the "give me the best compression", and the values in between gives something reasonable in between. The standard library has big differences in levels 1-4, but levels 5-9 having no significant gains - often spending a lot more time than can be justified by the achieved compression. + +There are links to all the test data in the [spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing) in the top left field on each tab. + +## Web Content + +This test set aims to emulate typical use in a web server. The test-set is 4GB data in 53k files, and is a mixture of (mostly) HTML, JS, CSS. + +Since level 1 and 9 are close to being the same code, they are quite close. But looking at the levels in-between the differences are quite big. + +Looking at level 6, this package is 88% faster, but will output about 6% more data. For a web server, this means you can serve 88% more data, but have to pay for 6% more bandwidth. You can draw your own conclusions on what would be the most expensive for your case. + +## Object files + +This test is for typical data files stored on a server. In this case it is a collection of Go precompiled objects. They are very compressible. + +The picture is similar to the web content, but with small differences since this is very compressible. Levels 2-3 offer good speed, but is sacrificing quite a bit of compression. + +The standard library seems suboptimal on level 3 and 4 - offering both worse compression and speed than level 6 & 7 of this package respectively. + +## Highly Compressible File + +This is a JSON file with very high redundancy. The reduction starts at 95% on level 1, so in real life terms we are dealing with something like a highly redundant stream of data, etc. + +It is definitely visible that we are dealing with specialized content here, so the results are very scattered. This package does not do very well at levels 1-4, but picks up significantly at level 5 and levels 7 and 8 offering great speed for the achieved compression. + +So if you know you content is extremely compressible you might want to go slightly higher than the defaults. The standard library has a huge gap between levels 3 and 4 in terms of speed (2.75x slowdown), so it offers little "middle ground". + +## Medium-High Compressible + +This is a pretty common test corpus: [enwik9](http://mattmahoney.net/dc/textdata.html). It contains the first 10^9 bytes of the English Wikipedia dump on Mar. 3, 2006. This is a very good test of typical text based compression and more data heavy streams. + +We see a similar picture here as in "Web Content". On equal levels some compression is sacrificed for more speed. Level 5 seems to be the best trade-off between speed and size, beating stdlib level 3 in both. + +## Medium Compressible + +I will combine two test sets, one [10GB file set](http://mattmahoney.net/dc/10gb.html) and a VM disk image (~8GB). Both contain different data types and represent a typical backup scenario. + +The most notable thing is how quickly the standard library drops to very low compression speeds around level 5-6 without any big gains in compression. Since this type of data is fairly common, this does not seem like good behavior. + + +## Un-compressible Content + +This is mainly a test of how good the algorithms are at detecting un-compressible input. The standard library only offers this feature with very conservative settings at level 1. Obviously there is no reason for the algorithms to try to compress input that cannot be compressed. The only downside is that it might skip some compressible data on false detections. + + +## Huffman only compression + +This compression library adds a special compression level, named `HuffmanOnly`, which allows near linear time compression. This is done by completely disabling matching of previous data, and only reduce the number of bits to represent each character. + +This means that often used characters, like 'e' and ' ' (space) in text use the fewest bits to represent, and rare characters like '¤' takes more bits to represent. For more information see [wikipedia](https://en.wikipedia.org/wiki/Huffman_coding) or this nice [video](https://youtu.be/ZdooBTdW5bM). + +Since this type of compression has much less variance, the compression speed is mostly unaffected by the input data, and is usually more than *180MB/s* for a single core. + +The downside is that the compression ratio is usually considerably worse than even the fastest conventional compression. The compression ratio can never be better than 8:1 (12.5%). + +The linear time compression can be used as a "better than nothing" mode, where you cannot risk the encoder to slow down on some content. For comparison, the size of the "Twain" text is *233460 bytes* (+29% vs. level 1) and encode speed is 144MB/s (4.5x level 1). So in this case you trade a 30% size increase for a 4 times speedup. + +For more information see my blog post on [Fast Linear Time Compression](http://blog.klauspost.com/constant-time-gzipzip-compression/). + +This is implemented on Go 1.7 as "Huffman Only" mode, though not exposed for gzip. + +# Other packages + +Here are other packages of good quality and pure Go (no cgo wrappers or autoconverted code): + +* [github.com/pierrec/lz4](https://github.com/pierrec/lz4) - strong multithreaded LZ4 compression. +* [github.com/cosnicolaou/pbzip2](https://github.com/cosnicolaou/pbzip2) - multithreaded bzip2 decompression. +* [github.com/dsnet/compress](https://github.com/dsnet/compress) - brotli decompression, bzip2 writer. + +# license + +This code is licensed under the same conditions as the original Go code. See LICENSE file. diff --git a/vendor/github.com/klauspost/compress/compressible.go b/vendor/github.com/klauspost/compress/compressible.go new file mode 100644 index 0000000..ea5a692 --- /dev/null +++ b/vendor/github.com/klauspost/compress/compressible.go @@ -0,0 +1,85 @@ +package compress + +import "math" + +// Estimate returns a normalized compressibility estimate of block b. +// Values close to zero are likely uncompressible. +// Values above 0.1 are likely to be compressible. +// Values above 0.5 are very compressible. +// Very small lengths will return 0. +func Estimate(b []byte) float64 { + if len(b) < 16 { + return 0 + } + + // Correctly predicted order 1 + hits := 0 + lastMatch := false + var o1 [256]byte + var hist [256]int + c1 := byte(0) + for _, c := range b { + if c == o1[c1] { + // We only count a hit if there was two correct predictions in a row. + if lastMatch { + hits++ + } + lastMatch = true + } else { + lastMatch = false + } + o1[c1] = c + c1 = c + hist[c]++ + } + + // Use x^0.6 to give better spread + prediction := math.Pow(float64(hits)/float64(len(b)), 0.6) + + // Calculate histogram distribution + variance := float64(0) + avg := float64(len(b)) / 256 + + for _, v := range hist { + Δ := float64(v) - avg + variance += Δ * Δ + } + + stddev := math.Sqrt(float64(variance)) / float64(len(b)) + exp := math.Sqrt(1 / float64(len(b))) + + // Subtract expected stddev + stddev -= exp + if stddev < 0 { + stddev = 0 + } + stddev *= 1 + exp + + // Use x^0.4 to give better spread + entropy := math.Pow(stddev, 0.4) + + // 50/50 weight between prediction and histogram distribution + return math.Pow((prediction+entropy)/2, 0.9) +} + +// ShannonEntropyBits returns the number of bits minimum required to represent +// an entropy encoding of the input bytes. +// https://en.wiktionary.org/wiki/Shannon_entropy +func ShannonEntropyBits(b []byte) int { + if len(b) == 0 { + return 0 + } + var hist [256]int + for _, c := range b { + hist[c]++ + } + shannon := float64(0) + invTotal := 1.0 / float64(len(b)) + for _, v := range hist[:] { + if v > 0 { + n := float64(v) + shannon += math.Ceil(-math.Log2(n*invTotal) * n) + } + } + return int(math.Ceil(shannon)) +} diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index 25dbe3e..bffa2f3 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -6,6 +6,7 @@ package flate import ( + "encoding/binary" "fmt" "io" "math" @@ -37,15 +38,17 @@ const ( maxMatchLength = 258 // The longest match for the compressor minOffsetSize = 1 // The shortest offset that makes any sense - // The maximum number of tokens we put into a single flat block, just too - // stop things from getting too large. - maxFlateBlockTokens = 1 << 14 + // The maximum number of tokens we will encode at the time. + // Smaller sizes usually creates less optimal blocks. + // Bigger can make context switching slow. + // We use this for levels 7-9, so we make it big. + maxFlateBlockTokens = 1 << 15 maxStoreBlockSize = 65535 hashBits = 17 // After 17 performance degrades hashSize = 1 << hashBits hashMask = (1 << hashBits) - 1 hashShift = (hashBits + minMatchLength - 1) / minMatchLength - maxHashOffset = 1 << 24 + maxHashOffset = 1 << 28 skipNever = math.MaxInt32 @@ -70,9 +73,9 @@ var levels = []compressionLevel{ {0, 0, 0, 0, 0, 6}, // Levels 7-9 use increasingly more lazy matching // and increasingly stringent conditions for "good enough". - {8, 8, 24, 16, skipNever, 7}, - {10, 16, 24, 64, skipNever, 8}, - {32, 258, 258, 4096, skipNever, 9}, + {8, 12, 16, 24, skipNever, 7}, + {16, 30, 40, 64, skipNever, 8}, + {32, 258, 258, 1024, skipNever, 9}, } // advancedState contains state for the advanced levels, with bigger hash tables, etc. @@ -93,8 +96,9 @@ type advancedState struct { hashOffset int // input window: unprocessed data is window[index:windowEnd] - index int - hashMatch [maxMatchLength + minMatchLength]uint32 + index int + estBitsPerByte int + hashMatch [maxMatchLength + minMatchLength]uint32 hash uint32 ii uint16 // position of last match, intended to overflow to reset. @@ -103,6 +107,7 @@ type advancedState struct { type compressor struct { compressionLevel + h *huffmanEncoder w *huffmanBitWriter // compression algorithm @@ -170,7 +175,8 @@ func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error { window = d.window[d.blockStart:index] } d.blockStart = index - d.w.writeBlock(tok, eof, window) + //d.w.writeBlock(tok, eof, window) + d.w.writeBlockDynamic(tok, eof, window, d.sync) return d.w.err } return nil @@ -263,7 +269,7 @@ func (d *compressor) fillWindow(b []byte) { // Try to find a match starting at index whose length is greater than prevSize. // We only look at chainCount possibilities before giving up. // pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead -func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) { +func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, offset int, ok bool) { minMatchLook := maxMatchLength if lookahead < minMatchLook { minMatchLook = lookahead @@ -279,36 +285,75 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead // If we've got a match that's good enough, only look in 1/4 the chain. tries := d.chain - length = prevLength - if length >= d.good { - tries >>= 2 - } + length = minMatchLength - 1 wEnd := win[pos+length] wPos := win[pos:] minIndex := pos - windowSize + if minIndex < 0 { + minIndex = 0 + } + offset = 0 + + cGain := 0 + if d.chain < 100 { + for i := prevHead; tries > 0; tries-- { + if wEnd == win[i+length] { + n := matchLen(win[i:i+minMatchLook], wPos) + if n > length { + length = n + offset = pos - i + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] + } + } + if i <= minIndex { + // hashPrev[i & windowMask] has already been overwritten, so stop now. + break + } + i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset + if i < minIndex { + break + } + } + return + } + // Some like it higher (CSV), some like it lower (JSON) + const baseCost = 6 + // Base is 4 bytes at with an additional cost. + // Matches must be better than this. for i := prevHead; tries > 0; tries-- { if wEnd == win[i+length] { n := matchLen(win[i:i+minMatchLook], wPos) - - if n > length && (n > minMatchLength || pos-i <= 4096) { - length = n - offset = pos - i - ok = true - if n >= nice { - // The match is good enough that we don't try to find a better one. - break + if n > length { + // Calculate gain. Estimate + newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]]) + + //fmt.Println(n, "gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n])) + if newGain > cGain { + length = n + offset = pos - i + cGain = newGain + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] } - wEnd = win[pos+n] } } - if i == minIndex { + if i <= minIndex { // hashPrev[i & windowMask] has already been overwritten, so stop now. break } i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset - if i < minIndex || i < 0 { + if i < minIndex { break } } @@ -327,8 +372,7 @@ func (d *compressor) writeStoredBlock(buf []byte) error { // of the supplied slice. // The caller must ensure that len(b) >= 4. func hash4(b []byte) uint32 { - b = b[:4] - return hash4u(uint32(b[3])|uint32(b[2])<<8|uint32(b[1])<<16|uint32(b[0])<<24, hashBits) + return hash4u(binary.LittleEndian.Uint32(b), hashBits) } // bulkHash4 will compute hashes using the same @@ -337,11 +381,12 @@ func bulkHash4(b []byte, dst []uint32) { if len(b) < 4 { return } - hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24 + hb := binary.LittleEndian.Uint32(b) + dst[0] = hash4u(hb, hashBits) end := len(b) - 4 + 1 for i := 1; i < end; i++ { - hb = (hb << 8) | uint32(b[i+3]) + hb = (hb >> 8) | uint32(b[i+3])<<24 dst[i] = hash4u(hb, hashBits) } } @@ -374,10 +419,21 @@ func (d *compressor) deflateLazy() { if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync { return } + if d.windowEnd != s.index && d.chain > 100 { + // Get literal huffman coder. + if d.h == nil { + d.h = newHuffmanEncoder(maxFlateBlockTokens) + } + var tmp [256]uint16 + for _, v := range d.window[s.index:d.windowEnd] { + tmp[v]++ + } + d.h.generate(tmp[:], 15) + } s.maxInsertIndex = d.windowEnd - (minMatchLength - 1) if s.index < s.maxInsertIndex { - s.hash = hash4(d.window[s.index : s.index+minMatchLength]) + s.hash = hash4(d.window[s.index:]) } for { @@ -410,7 +466,7 @@ func (d *compressor) deflateLazy() { } if s.index < s.maxInsertIndex { // Update the hash - s.hash = hash4(d.window[s.index : s.index+minMatchLength]) + s.hash = hash4(d.window[s.index:]) ch := s.hashHead[s.hash&hashMask] s.chainHead = int(ch) s.hashPrev[s.index&windowMask] = ch @@ -426,12 +482,37 @@ func (d *compressor) deflateLazy() { } if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy { - if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok { + if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead); ok { s.length = newLength s.offset = newOffset } } + if prevLength >= minMatchLength && s.length <= prevLength { + // Check for better match at end... + // + // checkOff must be >=2 since we otherwise risk checking s.index + // Offset of 2 seems to yield best results. + const checkOff = 2 + prevIndex := s.index - 1 + if prevIndex+prevLength+checkOff < s.maxInsertIndex { + end := lookahead + if lookahead > maxMatchLength { + end = maxMatchLength + } + end += prevIndex + idx := prevIndex + prevLength - (4 - checkOff) + h := hash4(d.window[idx:]) + ch2 := int(s.hashHead[h&hashMask]) - s.hashOffset - prevLength + (4 - checkOff) + if ch2 > minIndex { + length := matchLen(d.window[prevIndex:end], d.window[ch2:]) + // It seems like a pure length metric is best. + if length > prevLength { + prevLength = length + prevOffset = prevIndex - ch2 + } + } + } // There was a match at the previous step, and the current match is // not better. Output the previous match. d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize)) @@ -440,8 +521,7 @@ func (d *compressor) deflateLazy() { // index and index-1 are already inserted. If there is not enough // lookahead, the last two strings are not inserted into the hash // table. - var newIndex int - newIndex = s.index + prevLength - 1 + newIndex := s.index + prevLength - 1 // Calculate missing hashes end := newIndex if end > s.maxInsertIndex { @@ -480,6 +560,7 @@ func (d *compressor) deflateLazy() { } d.tokens.Reset() } + s.ii = 0 } else { // Reset, if we got a match this run. if s.length >= minMatchLength { @@ -499,13 +580,12 @@ func (d *compressor) deflateLazy() { // If we have a long run of no matches, skip additional bytes // Resets when s.ii overflows after 64KB. - if s.ii > 31 { - n := int(s.ii >> 5) + if n := int(s.ii) - d.chain; n > 0 { + n = 1 + int(n>>6) for j := 0; j < n; j++ { if s.index >= d.windowEnd-1 { break } - d.tokens.AddLiteral(d.window[s.index-1]) if d.tokens.n == maxFlateBlockTokens { if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { @@ -513,6 +593,14 @@ func (d *compressor) deflateLazy() { } d.tokens.Reset() } + // Index... + if s.index < s.maxInsertIndex { + h := hash4(d.window[s.index:]) + ch := s.hashHead[h] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[h] = uint32(s.index + s.hashOffset) + } s.index++ } // Flush last byte @@ -612,7 +700,9 @@ func (d *compressor) write(b []byte) (n int, err error) { } n = len(b) for len(b) > 0 { - d.step(d) + if d.windowEnd == len(d.window) || d.sync { + d.step(d) + } b = b[d.fill(d, b):] if d.err != nil { return 0, d.err @@ -645,21 +735,21 @@ func (d *compressor) init(w io.Writer, level int) (err error) { d.fill = (*compressor).fillBlock d.step = (*compressor).store case level == ConstantCompression: - d.w.logNewTablePenalty = 4 - d.window = make([]byte, maxStoreBlockSize) + d.w.logNewTablePenalty = 10 + d.window = make([]byte, 32<<10) d.fill = (*compressor).fillBlock d.step = (*compressor).storeHuff case level == DefaultCompression: level = 5 fallthrough case level >= 1 && level <= 6: - d.w.logNewTablePenalty = 6 + d.w.logNewTablePenalty = 7 d.fast = newFastEnc(level) d.window = make([]byte, maxStoreBlockSize) d.fill = (*compressor).fillBlock d.step = (*compressor).storeFast case 7 <= level && level <= 9: - d.w.logNewTablePenalty = 10 + d.w.logNewTablePenalty = 8 d.state = &advancedState{} d.compressionLevel = levels[level] d.initDeflate() diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go index 6d4c1e9..d55ea2a 100644 --- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go +++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go @@ -6,6 +6,7 @@ package flate import ( + "encoding/binary" "fmt" "math/bits" ) @@ -44,7 +45,7 @@ const ( bTableBits = 17 // Bits used in the big tables bTableSize = 1 << bTableBits // Size of the table - allocHistory = maxStoreBlockSize * 10 // Size to preallocate for history. + allocHistory = maxStoreBlockSize * 5 // Size to preallocate for history. bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this. ) @@ -65,26 +66,15 @@ func load32(b []byte, i int) uint32 { } func load64(b []byte, i int) uint64 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:8] - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 + return binary.LittleEndian.Uint64(b[i:]) } func load3232(b []byte, i int32) uint32 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:4] - return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 + return binary.LittleEndian.Uint32(b[i:]) } func load6432(b []byte, i int32) uint64 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:8] - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 + return binary.LittleEndian.Uint64(b[i:]) } func hash(u uint32) uint32 { @@ -127,7 +117,7 @@ func (e *fastGen) addBlock(src []byte) int32 { // hash4 returns the hash of u to fit in a hash table with h bits. // Preferably h should be a constant and should always be <32. func hash4u(u uint32, h uint8) uint32 { - return (u * prime4bytes) >> ((32 - h) & 31) + return (u * prime4bytes) >> ((32 - h) & reg8SizeMask32) } type tableEntryPrev struct { @@ -138,25 +128,25 @@ type tableEntryPrev struct { // hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits. // Preferably h should be a constant and should always be <32. func hash4x64(u uint64, h uint8) uint32 { - return (uint32(u) * prime4bytes) >> ((32 - h) & 31) + return (uint32(u) * prime4bytes) >> ((32 - h) & reg8SizeMask32) } // hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. // Preferably h should be a constant and should always be <64. func hash7(u uint64, h uint8) uint32 { - return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63)) + return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64)) } // hash8 returns the hash of u to fit in a hash table with h bits. // Preferably h should be a constant and should always be <64. func hash8(u uint64, h uint8) uint32 { - return uint32((u * prime8bytes) >> ((64 - h) & 63)) + return uint32((u * prime8bytes) >> ((64 - h) & reg8SizeMask64)) } // hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. // Preferably h should be a constant and should always be <64. func hash6(u uint64, h uint8) uint32 { - return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63)) + return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & reg8SizeMask64)) } // matchlen will return the match length between offsets and t in src. @@ -189,7 +179,7 @@ func (e *fastGen) matchlen(s, t int32, src []byte) int32 { // matchlenLong will return the match length between offsets and t in src. // It is assumed that s > t, that t >=0 and s < len(src). func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 { - if debugDecode { + if debugDeflate { if t >= s { panic(fmt.Sprint("t >=s:", t, s)) } @@ -223,31 +213,20 @@ func (e *fastGen) Reset() { // matchLen returns the maximum length. // 'a' must be the shortest of the two. func matchLen(a, b []byte) int { - b = b[:len(a)] var checked int - if len(a) > 4 { - // Try 4 bytes first - if diff := load32(a, 0) ^ load32(b, 0); diff != 0 { - return bits.TrailingZeros32(diff) >> 3 - } - // Switch to 8 byte matching. - checked = 4 - a = a[4:] - b = b[4:] - for len(a) >= 8 { - b = b[:len(a)] - if diff := load64(a, 0) ^ load64(b, 0); diff != 0 { - return checked + (bits.TrailingZeros64(diff) >> 3) - } - checked += 8 - a = a[8:] - b = b[8:] + + for len(a) >= 8 { + if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 { + return checked + (bits.TrailingZeros64(diff) >> 3) } + checked += 8 + a = a[8:] + b = b[8:] } b = b[:len(a)] for i := range a { if a[i] != b[i] { - return int(i) + checked + return i + checked } } return len(a) + checked diff --git a/vendor/github.com/klauspost/compress/flate/gen_inflate.go b/vendor/github.com/klauspost/compress/flate/gen_inflate.go deleted file mode 100644 index c74a95f..0000000 --- a/vendor/github.com/klauspost/compress/flate/gen_inflate.go +++ /dev/null @@ -1,274 +0,0 @@ -// +build generate - -//go:generate go run $GOFILE && gofmt -w inflate_gen.go - -package main - -import ( - "os" - "strings" -) - -func main() { - f, err := os.Create("inflate_gen.go") - if err != nil { - panic(err) - } - defer f.Close() - types := []string{"*bytes.Buffer", "*bytes.Reader", "*bufio.Reader", "*strings.Reader"} - names := []string{"BytesBuffer", "BytesReader", "BufioReader", "StringsReader"} - imports := []string{"bytes", "bufio", "io", "strings", "math/bits"} - f.WriteString(`// Code generated by go generate gen_inflate.go. DO NOT EDIT. - -package flate - -import ( -`) - - for _, imp := range imports { - f.WriteString("\t\"" + imp + "\"\n") - } - f.WriteString(")\n\n") - - template := ` - -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) $FUNCNAME$() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - fr := f.r.($TYPE$) - moreBits := func() error { - c, err := fr.ReadByte() - if err != nil { - return noEOF(err) - } - f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 - return nil - } - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - var v int - { - // Inlined v, err := f.huffSym(f.hl) - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hl.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b - for { - for nb < n { - c, err := fr.ReadByte() - if err != nil { - f.b = b - f.nb = nb - f.err = noEOF(err) - return - } - f.roffset++ - b |= uint32(c) << (nb & 31) - nb += 8 - } - chunk := f.hl.chunks[b&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= nb { - if n == 0 { - f.b = b - f.nb = nb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - f.b = b >> (n & 31) - f.nb = nb - n - v = int(chunk >> huffmanValueShift) - break - } - } - } - - var n uint // number of bits extra - var length int - var err error - switch { - case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() - f.step = (*decompressor).$FUNCNAME$ - f.stepState = stateInit - return - } - goto readLiteral - case v == 256: - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - n = 0 - case v < 269: - length = v*2 - (265*2 - 11) - n = 1 - case v < 273: - length = v*4 - (269*4 - 19) - n = 2 - case v < 277: - length = v*8 - (273*8 - 35) - n = 3 - case v < 281: - length = v*16 - (277*16 - 67) - n = 4 - case v < 285: - length = v*32 - (281*32 - 131) - n = 5 - case v < maxNumLit: - length = 258 - n = 0 - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - return - } - if n > 0 { - for f.nb < n { - if err = moreBits(); err != nil { - if debugDecode { - fmt.Println("morebits n>0:", err) - } - f.err = err - return - } - } - length += int(f.b & uint32(1<>= n - f.nb -= n - } - - var dist int - if f.hd == nil { - for f.nb < 5 { - if err = moreBits(); err != nil { - if debugDecode { - fmt.Println("morebits f.nb<5:", err) - } - f.err = err - return - } - } - dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 - } else { - if dist, err = f.huffSym(f.hd); err != nil { - if debugDecode { - fmt.Println("huffsym:", err) - } - f.err = err - return - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << nb - for f.nb < nb { - if err = moreBits(); err != nil { - if debugDecode { - fmt.Println("morebits f.nb>= nb - f.nb -= nb - dist = 1<<(nb+1) + 1 + extra - default: - if debugDecode { - fmt.Println("dist too big:", dist, maxNumDist) - } - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > f.dict.histSize() { - if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) - } - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, dist - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() - f.step = (*decompressor).$FUNCNAME$ // We need to continue this work - f.stepState = stateDict - return - } - goto readLiteral - } -} - -` - for i, t := range types { - s := strings.Replace(template, "$FUNCNAME$", "huffman"+names[i], -1) - s = strings.Replace(s, "$TYPE$", t, -1) - f.WriteString(s) - } - f.WriteString("func (f *decompressor) huffmanBlockDecoder() func() {\n") - f.WriteString("\tswitch f.r.(type) {\n") - for i, t := range types { - f.WriteString("\t\tcase " + t + ":\n") - f.WriteString("\t\t\treturn f.huffman" + names[i] + "\n") - } - f.WriteString("\t\tdefault:\n") - f.WriteString("\t\t\treturn f.huffmanBlockGeneric") - f.WriteString("\t}\n}\n") -} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go index 53fe1d0..25f6d11 100644 --- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go +++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go @@ -5,7 +5,10 @@ package flate import ( + "encoding/binary" + "fmt" "io" + "math" ) const ( @@ -22,11 +25,15 @@ const ( codegenCodeCount = 19 badCode = 255 + // maxPredefinedTokens is the maximum number of tokens + // where we check if fixed size is smaller. + maxPredefinedTokens = 250 + // bufferFlushSize indicates the buffer size // after which bytes are flushed to the writer. // Should preferably be a multiple of 6, since // we accumulate 6 bytes between writes to the buffer. - bufferFlushSize = 240 + bufferFlushSize = 246 // bufferSize is the actual output byte buffer size. // It must have additional headroom for a flush @@ -34,8 +41,11 @@ const ( bufferSize = bufferFlushSize + 8 ) +// Minimum length code that emits bits. +const lengthExtraBitsMinCode = 8 + // The number of extra bits needed by length code X - LENGTH_CODES_START. -var lengthExtraBits = [32]int8{ +var lengthExtraBits = [32]uint8{ /* 257 */ 0, 0, 0, /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, @@ -49,28 +59,41 @@ var lengthBase = [32]uint8{ 64, 80, 96, 112, 128, 160, 192, 224, 255, } +// Minimum offset code that emits bits. +const offsetExtraBitsMinCode = 4 + // offset code word extra bits. -var offsetExtraBits = [64]int8{ +var offsetExtraBits = [32]int8{ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, /* extended window */ - 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, + 14, 14, } -var offsetBase = [64]uint32{ - /* normal deflate */ - 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, - 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, - 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, - 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, - 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, - 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, +var offsetCombined = [32]uint32{} - /* extended window */ - 0x008000, 0x00c000, 0x010000, 0x018000, 0x020000, - 0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000, - 0x100000, 0x180000, 0x200000, 0x300000, +func init() { + var offsetBase = [32]uint32{ + /* normal deflate */ + 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, + 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, + 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, + 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, + 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, + 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, + + /* extended window */ + 0x008000, 0x00c000, + } + + for i := range offsetCombined[:] { + // Don't use extended window values... + if offsetExtraBits[i] == 0 || offsetBase[i] > 0x006000 { + continue + } + offsetCombined[i] = uint32(offsetExtraBits[i]) | (offsetBase[i] << 8) + } } // The odd order in which the codegen code sizes are written. @@ -85,17 +108,18 @@ type huffmanBitWriter struct { // Data waiting to be written is bytes[0:nbytes] // and then the low nbits of bits. bits uint64 - nbits uint16 + nbits uint8 nbytes uint8 + lastHuffMan bool literalEncoding *huffmanEncoder + tmpLitEncoding *huffmanEncoder offsetEncoding *huffmanEncoder codegenEncoding *huffmanEncoder err error lastHeader int // Set between 0 (reused block can be up to 2x the size) logNewTablePenalty uint - lastHuffMan bool - bytes [256]byte + bytes [256 + 8]byte literalFreq [lengthCodesStart + 32]uint16 offsetFreq [32]uint16 codegenFreq [codegenCodeCount]uint16 @@ -127,6 +151,7 @@ func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { return &huffmanBitWriter{ writer: w, literalEncoding: newHuffmanEncoder(literalCount), + tmpLitEncoding: newHuffmanEncoder(literalCount), codegenEncoding: newHuffmanEncoder(codegenCodeCount), offsetEncoding: newHuffmanEncoder(offsetCodeCount), } @@ -139,37 +164,33 @@ func (w *huffmanBitWriter) reset(writer io.Writer) { w.lastHuffMan = false } -func (w *huffmanBitWriter) canReuse(t *tokens) (offsets, lits bool) { - offsets, lits = true, true +func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) { a := t.offHist[:offsetCodeCount] - b := w.offsetFreq[:len(a)] - for i := range a { - if b[i] == 0 && a[i] != 0 { - offsets = false - break + b := w.offsetEncoding.codes + b = b[:len(a)] + for i, v := range a { + if v != 0 && b[i].len == 0 { + return false } } a = t.extraHist[:literalCount-256] - b = w.literalFreq[256:literalCount] + b = w.literalEncoding.codes[256:literalCount] b = b[:len(a)] - for i := range a { - if b[i] == 0 && a[i] != 0 { - lits = false - break + for i, v := range a { + if v != 0 && b[i].len == 0 { + return false } } - if lits { - a = t.litHist[:] - b = w.literalFreq[:len(a)] - for i := range a { - if b[i] == 0 && a[i] != 0 { - lits = false - break - } + + a = t.litHist[:256] + b = w.literalEncoding.codes[:len(a)] + for i, v := range a { + if v != 0 && b[i].len == 0 { + return false } } - return + return true } func (w *huffmanBitWriter) flush() { @@ -205,7 +226,7 @@ func (w *huffmanBitWriter) write(b []byte) { _, w.err = w.writer.Write(b) } -func (w *huffmanBitWriter) writeBits(b int32, nb uint16) { +func (w *huffmanBitWriter) writeBits(b int32, nb uint8) { w.bits |= uint64(b) << (w.nbits & 63) w.nbits += nb if w.nbits >= 48 { @@ -407,7 +428,7 @@ func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) { func (w *huffmanBitWriter) writeCode(c hcode) { // The function does not get inlined if we "& 63" the shift. - w.bits |= uint64(c.code) << w.nbits + w.bits |= uint64(c.code) << (w.nbits & 63) w.nbits += c.len if w.nbits >= 48 { w.writeOutBits() @@ -420,13 +441,11 @@ func (w *huffmanBitWriter) writeOutBits() { w.bits >>= 48 w.nbits -= 48 n := w.nbytes - w.bytes[n] = byte(bits) - w.bytes[n+1] = byte(bits >> 8) - w.bytes[n+2] = byte(bits >> 16) - w.bytes[n+3] = byte(bits >> 24) - w.bytes[n+4] = byte(bits >> 32) - w.bytes[n+5] = byte(bits >> 40) + + // We over-write, but faster... + binary.LittleEndian.PutUint64(w.bytes[n:], bits) n += 6 + if n >= bufferFlushSize { if w.err != nil { n = 0 @@ -435,6 +454,7 @@ func (w *huffmanBitWriter) writeOutBits() { w.write(w.bytes[:n]) n = 0 } + w.nbytes = n } @@ -551,7 +571,7 @@ func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) { w.lastHeader = 0 } numLiterals, numOffsets := w.indexTokens(tokens, false) - w.generate(tokens) + w.generate() var extraBits int storedSize, storable := w.storedSize(input) if storable { @@ -562,7 +582,10 @@ func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) { // Fixed Huffman baseline. var literalEncoding = fixedLiteralEncoding var offsetEncoding = fixedOffsetEncoding - var size = w.fixedSize(extraBits) + var size = math.MaxInt32 + if tokens.n < maxPredefinedTokens { + size = w.fixedSize(extraBits) + } // Dynamic Huffman? var numCodegens int @@ -580,7 +603,7 @@ func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) { } // Stored bytes? - if storable && storedSize < size { + if storable && storedSize <= size { w.writeStoredHeader(len(input), eof) w.writeBytes(input) return @@ -619,22 +642,39 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b w.lastHeader = 0 w.lastHuffMan = false } - if !sync { - tokens.Fill() + + // fillReuse enables filling of empty values. + // This will make encodings always reusable without testing. + // However, this does not appear to benefit on most cases. + const fillReuse = false + + // Check if we can reuse... + if !fillReuse && w.lastHeader > 0 && !w.canReuse(tokens) { + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 } + numLiterals, numOffsets := w.indexTokens(tokens, !sync) + extraBits := 0 + ssize, storable := w.storedSize(input) + + const usePrefs = true + if storable || w.lastHeader > 0 { + extraBits = w.extraBitSize() + } var size int + // Check if we should reuse. if w.lastHeader > 0 { // Estimate size for using a new table. // Use the previous header size as the best estimate. newSize := w.lastHeader + tokens.EstimatedBits() - newSize += newSize >> w.logNewTablePenalty + newSize += int(w.literalEncoding.codes[endBlockMarker].len) + newSize>>w.logNewTablePenalty // The estimated size is calculated as an optimal table. // We add a penalty to make it more realistic and re-use a bit more. - reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + w.extraBitSize() + reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + extraBits // Check if a new table is better. if newSize < reuseSize { @@ -645,35 +685,83 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b } else { size = reuseSize } + + if tokens.n < maxPredefinedTokens { + if preSize := w.fixedSize(extraBits) + 7; usePrefs && preSize < size { + // Check if we get a reasonable size decrease. + if storable && ssize <= size { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + w.writeFixedHeader(eof) + if !sync { + tokens.AddEOB() + } + w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes) + return + } + } // Check if we get a reasonable size decrease. - if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) { + if storable && ssize <= size { w.writeStoredHeader(len(input), eof) w.writeBytes(input) - w.lastHeader = 0 return } } // We want a new block/table if w.lastHeader == 0 { - w.generate(tokens) + if fillReuse && !sync { + w.fillTokens() + numLiterals, numOffsets = maxNumLit, maxNumDist + } else { + w.literalFreq[endBlockMarker] = 1 + } + + w.generate() // Generate codegen and codegenFrequencies, which indicates how to encode // the literalEncoding and the offsetEncoding. w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) w.codegenEncoding.generate(w.codegenFreq[:], 7) + var numCodegens int - size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, w.extraBitSize()) - // Store bytes, if we don't get a reasonable improvement. - if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) { + if fillReuse && !sync { + // Reindex for accurate size... + w.indexTokens(tokens, true) + } + size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) + + // Store predefined, if we don't get a reasonable improvement. + if tokens.n < maxPredefinedTokens { + if preSize := w.fixedSize(extraBits); usePrefs && preSize <= size { + // Store bytes, if we don't get an improvement. + if storable && ssize <= preSize { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + w.writeFixedHeader(eof) + if !sync { + tokens.AddEOB() + } + w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes) + return + } + } + + if storable && ssize <= size { + // Store bytes, if we don't get an improvement. w.writeStoredHeader(len(input), eof) w.writeBytes(input) - w.lastHeader = 0 return } // Write Huffman table. w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) - w.lastHeader, _ = w.headerSize() + if !sync { + w.lastHeader, _ = w.headerSize() + } w.lastHuffMan = false } @@ -684,6 +772,19 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes) } +func (w *huffmanBitWriter) fillTokens() { + for i, v := range w.literalFreq[:literalCount] { + if v == 0 { + w.literalFreq[i] = 1 + } + } + for i, v := range w.offsetFreq[:offsetCodeCount] { + if v == 0 { + w.offsetFreq[i] = 1 + } + } +} + // indexTokens indexes a slice of tokens, and updates // literalFreq and offsetFreq, and generates literalEncoding // and offsetEncoding. @@ -718,7 +819,7 @@ func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, num return } -func (w *huffmanBitWriter) generate(t *tokens) { +func (w *huffmanBitWriter) generate() { w.literalEncoding.generate(w.literalFreq[:literalCount], 15) w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15) } @@ -745,52 +846,135 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) offs := oeCodes[:32] lengths := leCodes[lengthCodesStart:] lengths = lengths[:32] + + // Go 1.16 LOVES having these on stack. + bits, nbits, nbytes := w.bits, w.nbits, w.nbytes + for _, t := range tokens { - if t < matchType { - w.writeCode(lits[t.literal()]) + if t < 256 { + //w.writeCode(lits[t.literal()]) + c := lits[t] + bits |= uint64(c.code) << (nbits & 63) + nbits += c.len + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } continue } // Write the length length := t.length() - lengthCode := lengthCode(length) + lengthCode := lengthCode(length) & 31 if false { - w.writeCode(lengths[lengthCode&31]) + w.writeCode(lengths[lengthCode]) } else { // inlined - c := lengths[lengthCode&31] - w.bits |= uint64(c.code) << (w.nbits & 63) - w.nbits += c.len - if w.nbits >= 48 { - w.writeOutBits() + c := lengths[lengthCode] + bits |= uint64(c.code) << (nbits & 63) + nbits += c.len + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } } } - extraLengthBits := uint16(lengthExtraBits[lengthCode&31]) - if extraLengthBits > 0 { - extraLength := int32(length - lengthBase[lengthCode&31]) - w.writeBits(extraLength, extraLengthBits) + if lengthCode >= lengthExtraBitsMinCode { + extraLengthBits := lengthExtraBits[lengthCode] + //w.writeBits(extraLength, extraLengthBits) + extraLength := int32(length - lengthBase[lengthCode]) + bits |= uint64(extraLength) << (nbits & 63) + nbits += extraLengthBits + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } } // Write the offset offset := t.offset() - offsetCode := offsetCode(offset) + offsetCode := (offset >> 16) & 31 if false { - w.writeCode(offs[offsetCode&31]) + w.writeCode(offs[offsetCode]) } else { // inlined - c := offs[offsetCode&31] - w.bits |= uint64(c.code) << (w.nbits & 63) - w.nbits += c.len - if w.nbits >= 48 { - w.writeOutBits() + c := offs[offsetCode] + bits |= uint64(c.code) << (nbits & 63) + nbits += c.len + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } } } - extraOffsetBits := uint16(offsetExtraBits[offsetCode&63]) - if extraOffsetBits > 0 { - extraOffset := int32(offset - offsetBase[offsetCode&63]) - w.writeBits(extraOffset, extraOffsetBits) + + if offsetCode >= offsetExtraBitsMinCode { + offsetComb := offsetCombined[offsetCode] + //w.writeBits(extraOffset, extraOffsetBits) + bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63) + nbits += uint8(offsetComb) + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } } } + // Restore... + w.bits, w.nbits, w.nbytes = bits, nbits, nbytes + if deferEOB { w.writeCode(leCodes[endBlockMarker]) } @@ -825,43 +1009,85 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { } } + // Fill is rarely better... + const fill = false + const numLiterals = endBlockMarker + 1 + const numOffsets = 1 + // Add everything as literals // We have to estimate the header size. // Assume header is around 70 bytes: // https://stackoverflow.com/a/25454430 const guessHeaderSizeBits = 70 * 8 - estBits, estExtra := histogramSize(input, w.literalFreq[:], !eof && !sync) - estBits += w.lastHeader + 15 + histogram(input, w.literalFreq[:numLiterals], fill) + ssize, storable := w.storedSize(input) + if storable && len(input) > 1024 { + // Quick check for incompressible content. + abs := float64(0) + avg := float64(len(input)) / 256 + max := float64(len(input) * 2) + for _, v := range w.literalFreq[:256] { + diff := float64(v) - avg + abs += diff * diff + if abs > max { + break + } + } + if abs < max { + if debugDeflate { + fmt.Println("stored", abs, "<", max) + } + // No chance we can compress this... + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + } + w.literalFreq[endBlockMarker] = 1 + w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15) + if fill { + // Clear fill... + for i := range w.literalFreq[:numLiterals] { + w.literalFreq[i] = 0 + } + histogram(input, w.literalFreq[:numLiterals], false) + } + estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals]) + estBits += w.lastHeader if w.lastHeader == 0 { estBits += guessHeaderSizeBits } estBits += estBits >> w.logNewTablePenalty // Store bytes, if we don't get a reasonable improvement. - ssize, storable := w.storedSize(input) - if storable && ssize < estBits { + if storable && ssize <= estBits { + if debugDeflate { + fmt.Println("stored,", ssize, "<=", estBits) + } w.writeStoredHeader(len(input), eof) w.writeBytes(input) return } if w.lastHeader > 0 { - reuseSize := w.literalEncoding.bitLength(w.literalFreq[:256]) - estBits += estExtra + reuseSize := w.literalEncoding.canReuseBits(w.literalFreq[:256]) if estBits < reuseSize { + if debugDeflate { + fmt.Println("NOT reusing, reuse:", reuseSize/8, "> new:", estBits/8, "header est:", w.lastHeader/8, "bytes") + } // We owe an EOB w.writeCode(w.literalEncoding.codes[endBlockMarker]) w.lastHeader = 0 + } else if debugDeflate { + fmt.Println("reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8) } } - const numLiterals = endBlockMarker + 1 - const numOffsets = 1 + count := 0 if w.lastHeader == 0 { - w.literalFreq[endBlockMarker] = 1 - w.literalEncoding.generate(w.literalFreq[:numLiterals], 15) - + // Use the temp encoding, so swap. + w.literalEncoding, w.tmpLitEncoding = w.tmpLitEncoding, w.literalEncoding // Generate codegen and codegenFrequencies, which indicates how to encode // the literalEncoding and the offsetEncoding. w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset) @@ -872,39 +1098,93 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) w.lastHuffMan = true w.lastHeader, _ = w.headerSize() + if debugDeflate { + count += w.lastHeader + fmt.Println("header:", count/8) + } + } + + encoding := w.literalEncoding.codes[:256] + // Go 1.16 LOVES having these on stack. At least 1.5x the speed. + bits, nbits, nbytes := w.bits, w.nbits, w.nbytes + + if debugDeflate { + count -= int(nbytes)*8 + int(nbits) + } + // Unroll, write 3 codes/loop. + // Fastest number of unrolls. + for len(input) > 3 { + // We must have at least 48 bits free. + if nbits >= 8 { + n := nbits >> 3 + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + bits >>= (n * 8) & 63 + nbits -= n * 8 + nbytes += n + } + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + if debugDeflate { + count += int(nbytes) * 8 + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + a, b := encoding[input[0]], encoding[input[1]] + bits |= uint64(a.code) << (nbits & 63) + bits |= uint64(b.code) << ((nbits + a.len) & 63) + c := encoding[input[2]] + nbits += b.len + a.len + bits |= uint64(c.code) << (nbits & 63) + nbits += c.len + input = input[3:] } - encoding := w.literalEncoding.codes[:257] + // Remaining... for _, t := range input { - // Bitwriting inlined, ~30% speedup - c := encoding[t] - w.bits |= uint64(c.code) << ((w.nbits) & 63) - w.nbits += c.len - if w.nbits >= 48 { - bits := w.bits - w.bits >>= 48 - w.nbits -= 48 - n := w.nbytes - w.bytes[n] = byte(bits) - w.bytes[n+1] = byte(bits >> 8) - w.bytes[n+2] = byte(bits >> 16) - w.bytes[n+3] = byte(bits >> 24) - w.bytes[n+4] = byte(bits >> 32) - w.bytes[n+5] = byte(bits >> 40) - n += 6 - if n >= bufferFlushSize { + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { if w.err != nil { - n = 0 + nbytes = 0 return } - w.write(w.bytes[:n]) - n = 0 + if debugDeflate { + count += int(nbytes) * 8 + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 } - w.nbytes = n + } + // Bitwriting inlined, ~30% speedup + c := encoding[t] + bits |= uint64(c.code) << (nbits & 63) + nbits += c.len + if debugDeflate { + count += int(c.len) } } + // Restore... + w.bits, w.nbits, w.nbytes = bits, nbits, nbytes + + if debugDeflate { + nb := count + int(nbytes)*8 + int(nbits) + fmt.Println("wrote", nb, "bits,", nb/8, "bytes.") + } + // Flush if needed to have space. + if w.nbits >= 48 { + w.writeOutBits() + } + if eof || sync { - w.writeCode(encoding[endBlockMarker]) + w.writeCode(w.literalEncoding.codes[endBlockMarker]) w.lastHeader = 0 w.lastHuffMan = false } diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go index 4c39a30..9ab497c 100644 --- a/vendor/github.com/klauspost/compress/flate/huffman_code.go +++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go @@ -17,13 +17,18 @@ const ( // hcode is a huffman code with a bit code and bit length. type hcode struct { - code, len uint16 + code uint16 + len uint8 } type huffmanEncoder struct { - codes []hcode - freqcache []literalNode - bitCount [17]int32 + codes []hcode + bitCount [17]int32 + + // Allocate a reusable buffer with the longest possible frequency table. + // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount. + // The largest of these is literalCount, so we allocate for that case. + freqcache [literalCount + 1]literalNode } type literalNode struct { @@ -52,7 +57,7 @@ type levelInfo struct { } // set sets the code and length of an hcode. -func (h *hcode) set(code uint16, length uint16) { +func (h *hcode) set(code uint16, length uint8) { h.len = length h.code = code } @@ -76,7 +81,7 @@ func generateFixedLiteralEncoding() *huffmanEncoder { var ch uint16 for ch = 0; ch < literalCount; ch++ { var bits uint16 - var size uint16 + var size uint8 switch { case ch < 144: // size 8, 000110000 .. 10111111 @@ -95,7 +100,7 @@ func generateFixedLiteralEncoding() *huffmanEncoder { bits = ch + 192 - 280 size = 8 } - codes[ch] = hcode{code: reverseBits(bits, byte(size)), len: size} + codes[ch] = hcode{code: reverseBits(bits, size), len: size} } return h } @@ -122,6 +127,29 @@ func (h *huffmanEncoder) bitLength(freq []uint16) int { return total } +func (h *huffmanEncoder) bitLengthRaw(b []byte) int { + var total int + for _, f := range b { + total += int(h.codes[f].len) + } + return total +} + +// canReuseBits returns the number of bits or math.MaxInt32 if the encoder cannot be reused. +func (h *huffmanEncoder) canReuseBits(freq []uint16) int { + var total int + for i, f := range freq { + if f != 0 { + code := h.codes[i] + if code.len == 0 { + return math.MaxInt32 + } + total += int(f) * int(code.len) + } + } + return total +} + // Return the number of literals assigned to each bit size in the Huffman encoding // // This method is only called when list.length >= 3 @@ -160,14 +188,19 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { // of the level j ancestor. var leafCounts [maxBitsLimit][maxBitsLimit]int32 + // Descending to only have 1 bounds check. + l2f := int32(list[2].freq) + l1f := int32(list[1].freq) + l0f := int32(list[0].freq) + int32(list[1].freq) + for level := int32(1); level <= maxBits; level++ { // For every level, the first two items are the first two characters. // We initialize the levels as if we had already figured this out. levels[level] = levelInfo{ level: level, - lastFreq: int32(list[1].freq), - nextCharFreq: int32(list[2].freq), - nextPairFreq: int32(list[0].freq) + int32(list[1].freq), + lastFreq: l1f, + nextCharFreq: l2f, + nextPairFreq: l0f, } leafCounts[level][level] = 2 if level == 1 { @@ -178,8 +211,8 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { // We need a total of 2*n - 2 items at top level and have already generated 2. levels[maxBits].needed = 2*n - 4 - level := maxBits - for { + level := uint32(maxBits) + for level < 16 { l := &levels[level] if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { // We've run out of both leafs and pairs. @@ -211,7 +244,13 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { // more values in the level below l.lastFreq = l.nextPairFreq // Take leaf counts from the lower level, except counts[level] remains the same. - copy(leafCounts[level][:level], leafCounts[level-1][:level]) + if true { + save := leafCounts[level][level] + leafCounts[level] = leafCounts[level-1] + leafCounts[level][level] = save + } else { + copy(leafCounts[level][:level], leafCounts[level-1][:level]) + } levels[l.level-1].needed = 2 } @@ -269,7 +308,7 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN sortByLiteral(chunk) for _, node := range chunk { - h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)} + h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint8(n)} code++ } list = list[0 : len(list)-int(bits)] @@ -281,13 +320,8 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN // freq An array of frequencies, in which frequency[i] gives the frequency of literal i. // maxBits The maximum number of bits to use for any literal. func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { - if h.freqcache == nil { - // Allocate a reusable buffer with the longest possible frequency table. - // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount. - // The largest of these is literalCount, so we allocate for that case. - h.freqcache = make([]literalNode, literalCount+1) - } list := h.freqcache[:len(freq)+1] + codes := h.codes[:len(freq)] // Number of non-zero literals count := 0 // Set list to be the set of all non-zero literals and their frequencies @@ -296,11 +330,10 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { list[count] = literalNode{uint16(i), f} count++ } else { - list[count] = literalNode{} - h.codes[i].len = 0 + codes[i].len = 0 } } - list[len(freq)] = literalNode{} + list[count] = literalNode{} list = list[:count] if count <= 2 { @@ -320,44 +353,32 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { h.assignEncodingAndSize(bitCount, list) } +// atLeastOne clamps the result between 1 and 15. func atLeastOne(v float32) float32 { if v < 1 { return 1 } + if v > 15 { + return 15 + } return v } -// histogramSize accumulates a histogram of b in h. -// An estimated size in bits is returned. // Unassigned values are assigned '1' in the histogram. -// len(h) must be >= 256, and h's elements must be all zeroes. -func histogramSize(b []byte, h []uint16, fill bool) (int, int) { +func fillHist(b []uint16) { + for i, v := range b { + if v == 0 { + b[i] = 1 + } + } +} + +func histogram(b []byte, h []uint16, fill bool) { h = h[:256] for _, t := range b { h[t]++ } - invTotal := 1.0 / float32(len(b)) - shannon := float32(0.0) - var extra float32 if fill { - oneBits := atLeastOne(-mFastLog2(invTotal)) - for i, v := range h[:] { - if v > 0 { - n := float32(v) - shannon += atLeastOne(-mFastLog2(n*invTotal)) * n - } else { - h[i] = 1 - extra += oneBits - } - } - } else { - for _, v := range h[:] { - if v > 0 { - n := float32(v) - shannon += atLeastOne(-mFastLog2(n*invTotal)) * n - } - } + fillHist(h) } - - return int(shannon + 0.99), int(extra + 0.99) } diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go index 3e4259f..414c0be 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate.go +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -9,10 +9,10 @@ package flate import ( "bufio" + "compress/flate" "fmt" "io" "math/bits" - "strconv" "sync" ) @@ -29,16 +29,26 @@ const ( debugDecode = false ) +// Value of length - 3 and extra bits. +type lengthExtra struct { + length, extra uint8 +} + +var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}} + +var bitMask32 = [32]uint32{ + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, + 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, + 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF, + 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF, +} // up to 32 bits + // Initialize the fixedHuffmanDecoder only once upon first use. var fixedOnce sync.Once var fixedHuffmanDecoder huffmanDecoder // A CorruptInputError reports the presence of corrupt input at a given offset. -type CorruptInputError int64 - -func (e CorruptInputError) Error() string { - return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) -} +type CorruptInputError = flate.CorruptInputError // An InternalError reports an error in the flate code itself. type InternalError string @@ -48,26 +58,12 @@ func (e InternalError) Error() string { return "flate: internal error: " + strin // A ReadError reports an error encountered while reading input. // // Deprecated: No longer returned. -type ReadError struct { - Offset int64 // byte offset where error occurred - Err error // error returned by underlying Read -} - -func (e *ReadError) Error() string { - return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() -} +type ReadError = flate.ReadError // A WriteError reports an error encountered while writing output. // // Deprecated: No longer returned. -type WriteError struct { - Offset int64 // byte offset where error occurred - Err error // error returned by underlying Write -} - -func (e *WriteError) Error() string { - return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() -} +type WriteError = flate.WriteError // Resetter resets a ReadCloser returned by NewReader or NewReaderDict to // to switch to a new underlying Reader. This permits reusing a ReadCloser @@ -339,11 +335,17 @@ func (f *decompressor) nextBlock() { switch typ { case 0: f.dataBlock() + if debugDecode { + fmt.Println("stored block") + } case 1: // compressed, fixed Huffman tables f.hl = &fixedHuffmanDecoder f.hd = nil f.huffmanBlockDecoder()() + if debugDecode { + fmt.Println("predefinied huffman block") + } case 2: // compressed, dynamic Huffman tables if f.err = f.readHuffman(); f.err != nil { @@ -352,6 +354,9 @@ func (f *decompressor) nextBlock() { f.hl = &f.h1 f.hd = &f.h2 f.huffmanBlockDecoder()() + if debugDecode { + fmt.Println("dynamic huffman block") + } default: // 3 is reserved. if debugDecode { @@ -522,8 +527,8 @@ func (f *decompressor) readHuffman() error { return err } } - rep += int(f.b & uint32(1<>= nb + rep += int(f.b & uint32(1<<(nb®SizeMaskUint32)-1)) + f.b >>= nb & regSizeMaskUint32 f.nb -= nb if i+rep > n { if debugDecode { @@ -561,219 +566,6 @@ func (f *decompressor) readHuffman() error { return nil } -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanBlockGeneric() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - var v int - { - // Inlined v, err := f.huffSym(f.hl) - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hl.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b - for { - for nb < n { - c, err := f.r.ReadByte() - if err != nil { - f.b = b - f.nb = nb - f.err = noEOF(err) - return - } - f.roffset++ - b |= uint32(c) << (nb & 31) - nb += 8 - } - chunk := f.hl.chunks[b&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= nb { - if n == 0 { - f.b = b - f.nb = nb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - f.b = b >> (n & 31) - f.nb = nb - n - v = int(chunk >> huffmanValueShift) - break - } - } - } - - var n uint // number of bits extra - var length int - var err error - switch { - case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() - f.step = (*decompressor).huffmanBlockGeneric - f.stepState = stateInit - return - } - goto readLiteral - case v == 256: - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - n = 0 - case v < 269: - length = v*2 - (265*2 - 11) - n = 1 - case v < 273: - length = v*4 - (269*4 - 19) - n = 2 - case v < 277: - length = v*8 - (273*8 - 35) - n = 3 - case v < 281: - length = v*16 - (277*16 - 67) - n = 4 - case v < 285: - length = v*32 - (281*32 - 131) - n = 5 - case v < maxNumLit: - length = 258 - n = 0 - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - return - } - if n > 0 { - for f.nb < n { - if err = f.moreBits(); err != nil { - if debugDecode { - fmt.Println("morebits n>0:", err) - } - f.err = err - return - } - } - length += int(f.b & uint32(1<>= n - f.nb -= n - } - - var dist int - if f.hd == nil { - for f.nb < 5 { - if err = f.moreBits(); err != nil { - if debugDecode { - fmt.Println("morebits f.nb<5:", err) - } - f.err = err - return - } - } - dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 - } else { - if dist, err = f.huffSym(f.hd); err != nil { - if debugDecode { - fmt.Println("huffsym:", err) - } - f.err = err - return - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << nb - for f.nb < nb { - if err = f.moreBits(); err != nil { - if debugDecode { - fmt.Println("morebits f.nb>= nb - f.nb -= nb - dist = 1<<(nb+1) + 1 + extra - default: - if debugDecode { - fmt.Println("dist too big:", dist, maxNumDist) - } - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > f.dict.histSize() { - if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) - } - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, dist - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() - f.step = (*decompressor).huffmanBlockGeneric // We need to continue this work - f.stepState = stateDict - return - } - goto readLiteral - } -} - // Copy a single uncompressed data block from input to output. func (f *decompressor) dataBlock() { // Uncompressed. @@ -869,7 +661,7 @@ func (f *decompressor) moreBits() error { return noEOF(err) } f.roffset++ - f.b |= uint32(c) << f.nb + f.b |= uint32(c) << (f.nb & regSizeMaskUint32) f.nb += 8 return nil } @@ -894,7 +686,7 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { return 0, noEOF(err) } f.roffset++ - b |= uint32(c) << (nb & 31) + b |= uint32(c) << (nb & regSizeMaskUint32) nb += 8 } chunk := h.chunks[b&(huffmanNumChunks-1)] @@ -913,7 +705,7 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { f.err = CorruptInputError(f.roffset) return 0, f.err } - f.b = b >> (n & 31) + f.b = b >> (n & regSizeMaskUint32) f.nb = nb - n return int(chunk >> huffmanValueShift), nil } diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go index 397dc1b..61342b6 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate_gen.go +++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go @@ -20,16 +20,11 @@ func (f *decompressor) huffmanBytesBuffer() { stateDict ) fr := f.r.(*bytes.Buffer) - moreBits := func() error { - c, err := fr.ReadByte() - if err != nil { - return noEOF(err) - } - f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 - return nil - } + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict switch f.stepState { case stateInit: @@ -49,128 +44,150 @@ readLiteral: // cases, the chunks slice will be 0 for the invalid sequence, leading it // satisfy the n == 0 check below. n := uint(f.hl.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b for { - for nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb f.err = noEOF(err) return } f.roffset++ - b |= uint32(c) << (nb & 31) - nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] n = uint(chunk & huffmanCountMask) if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] n = uint(chunk & huffmanCountMask) } - if n <= nb { + if n <= fnb { if n == 0 { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("huffsym: n==0") } f.err = CorruptInputError(f.roffset) return } - f.b = b >> (n & 31) - f.nb = nb - n + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n v = int(chunk >> huffmanValueShift) break } } } - var n uint // number of bits extra var length int - var err error switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBytesBuffer f.stepState = stateInit + f.b, f.nb = fb, fnb return } goto readLiteral case v == 256: + f.b, f.nb = fb, fnb f.finishBlock() return // otherwise, reference to older data case v < 265: length = v - (257 - 3) - n = 0 - case v < 269: - length = v*2 - (265*2 - 11) - n = 1 - case v < 273: - length = v*4 - (269*4 - 19) - n = 2 - case v < 277: - length = v*8 - (273*8 - 35) - n = 3 - case v < 281: - length = v*16 - (277*16 - 67) - n = 4 - case v < 285: - length = v*32 - (281*32 - 131) - n = 5 case v < maxNumLit: - length = 258 - n = 0 - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - return - } - if n > 0 { - for f.nb < n { - if err = moreBits(); err != nil { + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits n>0:", err) } f.err = err return } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") } - length += int(f.b & uint32(1<>= n - f.nb -= n + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return } - var dist int + var dist uint32 if f.hd == nil { - for f.nb < 5 { - if err = moreBits(); err != nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb<5:", err) } f.err = err return } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 } else { - if dist, err = f.huffSym(f.hd); err != nil { - if debugDecode { - fmt.Println("huffsym:", err) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break } - f.err = err - return } } @@ -180,21 +197,28 @@ readLiteral: case dist < maxNumDist: nb := uint(dist-2) >> 1 // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << nb - for f.nb < nb { - if err = moreBits(); err != nil { + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb>= nb - f.nb -= nb - dist = 1<<(nb+1) + 1 + extra + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra default: + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("dist too big:", dist, maxNumDist) } @@ -203,35 +227,38 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > f.dict.histSize() { + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) } f.err = CorruptInputError(f.roffset) return } - f.copyLen, f.copyDist = length, dist + f.copyLen, f.copyDist = length, int(dist) goto copyHistory } copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = dict.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work f.stepState = stateDict + f.b, f.nb = fb, fnb return } goto readLiteral } + // Not reached } // Decode a single Huffman block from f. @@ -244,16 +271,11 @@ func (f *decompressor) huffmanBytesReader() { stateDict ) fr := f.r.(*bytes.Reader) - moreBits := func() error { - c, err := fr.ReadByte() - if err != nil { - return noEOF(err) - } - f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 - return nil - } + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict switch f.stepState { case stateInit: @@ -273,128 +295,150 @@ readLiteral: // cases, the chunks slice will be 0 for the invalid sequence, leading it // satisfy the n == 0 check below. n := uint(f.hl.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b for { - for nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb f.err = noEOF(err) return } f.roffset++ - b |= uint32(c) << (nb & 31) - nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] n = uint(chunk & huffmanCountMask) if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] n = uint(chunk & huffmanCountMask) } - if n <= nb { + if n <= fnb { if n == 0 { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("huffsym: n==0") } f.err = CorruptInputError(f.roffset) return } - f.b = b >> (n & 31) - f.nb = nb - n + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n v = int(chunk >> huffmanValueShift) break } } } - var n uint // number of bits extra var length int - var err error switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBytesReader f.stepState = stateInit + f.b, f.nb = fb, fnb return } goto readLiteral case v == 256: + f.b, f.nb = fb, fnb f.finishBlock() return // otherwise, reference to older data case v < 265: length = v - (257 - 3) - n = 0 - case v < 269: - length = v*2 - (265*2 - 11) - n = 1 - case v < 273: - length = v*4 - (269*4 - 19) - n = 2 - case v < 277: - length = v*8 - (273*8 - 35) - n = 3 - case v < 281: - length = v*16 - (277*16 - 67) - n = 4 - case v < 285: - length = v*32 - (281*32 - 131) - n = 5 case v < maxNumLit: - length = 258 - n = 0 - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - return - } - if n > 0 { - for f.nb < n { - if err = moreBits(); err != nil { + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits n>0:", err) } f.err = err return } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") } - length += int(f.b & uint32(1<>= n - f.nb -= n + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return } - var dist int + var dist uint32 if f.hd == nil { - for f.nb < 5 { - if err = moreBits(); err != nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb<5:", err) } f.err = err return } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 } else { - if dist, err = f.huffSym(f.hd); err != nil { - if debugDecode { - fmt.Println("huffsym:", err) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break } - f.err = err - return } } @@ -404,21 +448,28 @@ readLiteral: case dist < maxNumDist: nb := uint(dist-2) >> 1 // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << nb - for f.nb < nb { - if err = moreBits(); err != nil { + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb>= nb - f.nb -= nb - dist = 1<<(nb+1) + 1 + extra + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra default: + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("dist too big:", dist, maxNumDist) } @@ -427,35 +478,38 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > f.dict.histSize() { + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) } f.err = CorruptInputError(f.roffset) return } - f.copyLen, f.copyDist = length, dist + f.copyLen, f.copyDist = length, int(dist) goto copyHistory } copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = dict.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBytesReader // We need to continue this work f.stepState = stateDict + f.b, f.nb = fb, fnb return } goto readLiteral } + // Not reached } // Decode a single Huffman block from f. @@ -468,16 +522,11 @@ func (f *decompressor) huffmanBufioReader() { stateDict ) fr := f.r.(*bufio.Reader) - moreBits := func() error { - c, err := fr.ReadByte() - if err != nil { - return noEOF(err) - } - f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 - return nil - } + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict switch f.stepState { case stateInit: @@ -497,128 +546,150 @@ readLiteral: // cases, the chunks slice will be 0 for the invalid sequence, leading it // satisfy the n == 0 check below. n := uint(f.hl.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b for { - for nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb f.err = noEOF(err) return } f.roffset++ - b |= uint32(c) << (nb & 31) - nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] n = uint(chunk & huffmanCountMask) if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] n = uint(chunk & huffmanCountMask) } - if n <= nb { + if n <= fnb { if n == 0 { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("huffsym: n==0") } f.err = CorruptInputError(f.roffset) return } - f.b = b >> (n & 31) - f.nb = nb - n + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n v = int(chunk >> huffmanValueShift) break } } } - var n uint // number of bits extra var length int - var err error switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBufioReader f.stepState = stateInit + f.b, f.nb = fb, fnb return } goto readLiteral case v == 256: + f.b, f.nb = fb, fnb f.finishBlock() return // otherwise, reference to older data case v < 265: length = v - (257 - 3) - n = 0 - case v < 269: - length = v*2 - (265*2 - 11) - n = 1 - case v < 273: - length = v*4 - (269*4 - 19) - n = 2 - case v < 277: - length = v*8 - (273*8 - 35) - n = 3 - case v < 281: - length = v*16 - (277*16 - 67) - n = 4 - case v < 285: - length = v*32 - (281*32 - 131) - n = 5 case v < maxNumLit: - length = 258 - n = 0 - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - return - } - if n > 0 { - for f.nb < n { - if err = moreBits(); err != nil { + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits n>0:", err) } f.err = err return } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - length += int(f.b & uint32(1<>= n - f.nb -= n + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return } - var dist int + var dist uint32 if f.hd == nil { - for f.nb < 5 { - if err = moreBits(); err != nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb<5:", err) } f.err = err return } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 } else { - if dist, err = f.huffSym(f.hd); err != nil { - if debugDecode { - fmt.Println("huffsym:", err) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break } - f.err = err - return } } @@ -628,21 +699,28 @@ readLiteral: case dist < maxNumDist: nb := uint(dist-2) >> 1 // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << nb - for f.nb < nb { - if err = moreBits(); err != nil { + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb>= nb - f.nb -= nb - dist = 1<<(nb+1) + 1 + extra + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra default: + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("dist too big:", dist, maxNumDist) } @@ -651,35 +729,38 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > f.dict.histSize() { + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) } f.err = CorruptInputError(f.roffset) return } - f.copyLen, f.copyDist = length, dist + f.copyLen, f.copyDist = length, int(dist) goto copyHistory } copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = dict.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBufioReader // We need to continue this work f.stepState = stateDict + f.b, f.nb = fb, fnb return } goto readLiteral } + // Not reached } // Decode a single Huffman block from f. @@ -692,16 +773,11 @@ func (f *decompressor) huffmanStringsReader() { stateDict ) fr := f.r.(*strings.Reader) - moreBits := func() error { - c, err := fr.ReadByte() - if err != nil { - return noEOF(err) - } - f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 - return nil - } + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict switch f.stepState { case stateInit: @@ -721,128 +797,401 @@ readLiteral: // cases, the chunks slice will be 0 for the invalid sequence, leading it // satisfy the n == 0 check below. n := uint(f.hl.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b for { - for nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb f.err = noEOF(err) return } f.roffset++ - b |= uint32(c) << (nb & 31) - nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] n = uint(chunk & huffmanCountMask) if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] n = uint(chunk & huffmanCountMask) } - if n <= nb { + if n <= fnb { if n == 0 { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("huffsym: n==0") } f.err = CorruptInputError(f.roffset) return } - f.b = b >> (n & 31) - f.nb = nb - n + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n v = int(chunk >> huffmanValueShift) break } } } - var n uint // number of bits extra var length int - var err error switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanStringsReader f.stepState = stateInit + f.b, f.nb = fb, fnb return } goto readLiteral case v == 256: + f.b, f.nb = fb, fnb f.finishBlock() return // otherwise, reference to older data case v < 265: length = v - (257 - 3) - n = 0 - case v < 269: - length = v*2 - (265*2 - 11) - n = 1 - case v < 273: - length = v*4 - (269*4 - 19) - n = 2 - case v < 277: - length = v*8 - (273*8 - 35) - n = 3 - case v < 281: - length = v*16 - (277*16 - 67) - n = 4 - case v < 285: - length = v*32 - (281*32 - 131) - n = 5 case v < maxNumLit: - length = 258 - n = 0 + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n default: if debugDecode { fmt.Println(v, ">= maxNumLit") } f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb return } - if n > 0 { - for f.nb < n { - if err = moreBits(); err != nil { + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanStringsReader // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanGenericReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(Reader) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanGenericReader + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits n>0:", err) } f.err = err return } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") } - length += int(f.b & uint32(1<>= n - f.nb -= n + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return } - var dist int + var dist uint32 if f.hd == nil { - for f.nb < 5 { - if err = moreBits(); err != nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb<5:", err) } f.err = err return } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 } else { - if dist, err = f.huffSym(f.hd); err != nil { - if debugDecode { - fmt.Println("huffsym:", err) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break } - f.err = err - return } } @@ -852,21 +1201,28 @@ readLiteral: case dist < maxNumDist: nb := uint(dist-2) >> 1 // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << nb - for f.nb < nb { - if err = moreBits(); err != nil { + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb>= nb - f.nb -= nb - dist = 1<<(nb+1) + 1 + extra + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra default: + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("dist too big:", dist, maxNumDist) } @@ -875,35 +1231,38 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > f.dict.histSize() { + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) } f.err = CorruptInputError(f.roffset) return } - f.copyLen, f.copyDist = length, dist + f.copyLen, f.copyDist = length, int(dist) goto copyHistory } copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = dict.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() - f.step = (*decompressor).huffmanStringsReader // We need to continue this work + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanGenericReader // We need to continue this work f.stepState = stateDict + f.b, f.nb = fb, fnb return } goto readLiteral } + // Not reached } func (f *decompressor) huffmanBlockDecoder() func() { @@ -916,7 +1275,9 @@ func (f *decompressor) huffmanBlockDecoder() func() { return f.huffmanBufioReader case *strings.Reader: return f.huffmanStringsReader + case Reader: + return f.huffmanGenericReader default: - return f.huffmanBlockGeneric + return f.huffmanGenericReader } } diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go index 1e5eea3..0f14f8d 100644 --- a/vendor/github.com/klauspost/compress/flate/level1.go +++ b/vendor/github.com/klauspost/compress/flate/level1.go @@ -1,6 +1,10 @@ package flate -import "fmt" +import ( + "encoding/binary" + "fmt" + "math/bits" +) // fastGen maintains the table for matches, // and the previous byte block for level 2. @@ -116,7 +120,32 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { // Extend the 4-byte match as long as possible. t := candidate.offset - e.cur - l := e.matchlenLong(s+4, t+4, src) + 4 + var l = int32(4) + if false { + l = e.matchlenLong(s+4, t+4, src) + 4 + } else { + // inlined: + a := src[s+4:] + b := src[t+4:] + for len(a) >= 8 { + if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 { + l += int32(bits.TrailingZeros64(diff) >> 3) + break + } + l += 8 + a = a[8:] + b = b[8:] + } + if len(a) < 8 { + b = b[:len(a)] + for i := range a { + if a[i] != b[i] { + break + } + l++ + } + } + } // Extend backwards for t > 0 && s > nextEmit && src[t-1] == src[s-1] { @@ -125,11 +154,43 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { l++ } if nextEmit < s { - emitLiteral(dst, src[nextEmit:s]) + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } } // Save the match found - dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + if false { + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + } else { + // Inlined... + xoffset := uint32(s - t - baseMatchOffset) + xlength := l + oc := offsetCode(xoffset) + xoffset |= oc << 16 + for xlength > 0 { + xl := xlength + if xl > 258 { + if xl > 258+baseMatchLength { + xl = 258 + } else { + xl = 258 - baseMatchLength + } + } + xlength -= xl + xl -= baseMatchLength + dst.extraHist[lengthCodes1[uint8(xl)]]++ + dst.offHist[oc]++ + dst.tokens[dst.n] = token(matchType | uint32(xl)<= s { diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go index 5b986a1..8603fbd 100644 --- a/vendor/github.com/klauspost/compress/flate/level2.go +++ b/vendor/github.com/klauspost/compress/flate/level2.go @@ -134,7 +134,15 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { l++ } if nextEmit < s { - emitLiteral(dst, src[nextEmit:s]) + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } } dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) @@ -155,7 +163,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { // Store every second hash in-between, but offset by 1. for i := s - l + 2; i < s-5; i += 7 { - x := load6432(src, int32(i)) + x := load6432(src, i) nextHash := hash4u(uint32(x), bTableBits) e.table[nextHash] = tableEntry{offset: e.cur + i} // Skip one diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go index c22b424..039639f 100644 --- a/vendor/github.com/klauspost/compress/flate/level3.go +++ b/vendor/github.com/klauspost/compress/flate/level3.go @@ -5,7 +5,7 @@ import "fmt" // fastEncL3 type fastEncL3 struct { fastGen - table [tableSize]tableEntryPrev + table [1 << 16]tableEntryPrev } // Encode uses a similar algorithm to level 2, will check up to two candidates. @@ -13,6 +13,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { const ( inputMargin = 8 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin + tableBits = 16 + tableSize = 1 << tableBits ) if debugDeflate && e.cur < 0 { @@ -73,7 +75,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { nextS := s var candidate tableEntry for { - nextHash := hash(cv) + nextHash := hash4u(cv, tableBits) s = nextS nextS = s + 1 + (s-nextEmit)>>skipLog if nextS > sLimit { @@ -141,7 +143,15 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { l++ } if nextEmit < s { - emitLiteral(dst, src[nextEmit:s]) + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } } dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) @@ -156,7 +166,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { // Index first pair after match end. if int(t+4) < len(src) && t > 0 { cv := load3232(src, t) - nextHash := hash(cv) + nextHash := hash4u(cv, tableBits) e.table[nextHash] = tableEntryPrev{ Prev: e.table[nextHash].Cur, Cur: tableEntry{offset: e.cur + t}, @@ -165,30 +175,31 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { goto emitRemainder } - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-3 to s. - x := load6432(src, s-3) - prevHash := hash(uint32(x)) - e.table[prevHash] = tableEntryPrev{ - Prev: e.table[prevHash].Cur, - Cur: tableEntry{offset: e.cur + s - 3}, + // Store every 5th hash in-between. + for i := s - l + 2; i < s-5; i += 5 { + nextHash := hash4u(load3232(src, i), tableBits) + e.table[nextHash] = tableEntryPrev{ + Prev: e.table[nextHash].Cur, + Cur: tableEntry{offset: e.cur + i}} } - x >>= 8 - prevHash = hash(uint32(x)) + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 to s. + x := load6432(src, s-2) + prevHash := hash4u(uint32(x), tableBits) e.table[prevHash] = tableEntryPrev{ Prev: e.table[prevHash].Cur, Cur: tableEntry{offset: e.cur + s - 2}, } x >>= 8 - prevHash = hash(uint32(x)) + prevHash = hash4u(uint32(x), tableBits) e.table[prevHash] = tableEntryPrev{ Prev: e.table[prevHash].Cur, Cur: tableEntry{offset: e.cur + s - 1}, } x >>= 8 - currHash := hash(uint32(x)) + currHash := hash4u(uint32(x), tableBits) candidates := e.table[currHash] cv = uint32(x) e.table[currHash] = tableEntryPrev{ @@ -200,15 +211,15 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { candidate = candidates.Cur minOffset := e.cur + s - (maxMatchOffset - 4) - if candidate.offset > minOffset && cv != load3232(src, candidate.offset-e.cur) { - // We only check if value mismatches. - // Offset will always be invalid in other cases. + if candidate.offset > minOffset { + if cv == load3232(src, candidate.offset-e.cur) { + // Found a match... + continue + } candidate = candidates.Prev if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { - offset := s - (candidate.offset - e.cur) - if offset <= maxMatchOffset { - continue - } + // Match at prev... + continue } } cv = uint32(x >> 8) diff --git a/vendor/github.com/klauspost/compress/flate/level4.go b/vendor/github.com/klauspost/compress/flate/level4.go index e62f0c0..1cbffa1 100644 --- a/vendor/github.com/klauspost/compress/flate/level4.go +++ b/vendor/github.com/klauspost/compress/flate/level4.go @@ -135,7 +135,15 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { l++ } if nextEmit < s { - emitLiteral(dst, src[nextEmit:s]) + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } } if debugDeflate { if t >= s { diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go index d513f1f..4b97576 100644 --- a/vendor/github.com/klauspost/compress/flate/level5.go +++ b/vendor/github.com/klauspost/compress/flate/level5.go @@ -182,12 +182,27 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit // them as literal bytes. - // Extend the 4-byte match as long as possible. if l == 0 { + // Extend the 4-byte match as long as possible. l = e.matchlenLong(s+4, t+4, src) + 4 } else if l == maxMatchLength { l += e.matchlenLong(s+l, t+l, src) } + + // Try to locate a better match by checking the end of best match... + if sAt := s + l; l < 30 && sAt < sLimit { + eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset + // Test current + t2 := eLong - e.cur - l + off := s - t2 + if t2 >= 0 && off < maxMatchOffset && off > 0 { + if l2 := e.matchlenLong(s, t2, src); l2 > l { + t = t2 + l = l2 + } + } + } + // Extend backwards for t > 0 && s > nextEmit && src[t-1] == src[s-1] { s-- @@ -195,7 +210,15 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { l++ } if nextEmit < s { - emitLiteral(dst, src[nextEmit:s]) + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } } if debugDeflate { if t >= s { diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go index a52c80e..62888ed 100644 --- a/vendor/github.com/klauspost/compress/flate/level6.go +++ b/vendor/github.com/klauspost/compress/flate/level6.go @@ -211,6 +211,31 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { l += e.matchlenLong(s+l, t+l, src) } + // Try to locate a better match by checking the end-of-match... + if sAt := s + l; sAt < sLimit { + eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)] + // Test current + t2 := eLong.Cur.offset - e.cur - l + off := s - t2 + if off < maxMatchOffset { + if off > 0 && t2 >= 0 { + if l2 := e.matchlenLong(s, t2, src); l2 > l { + t = t2 + l = l2 + } + } + // Test next: + t2 = eLong.Prev.offset - e.cur - l + off := s - t2 + if off > 0 && off < maxMatchOffset && t2 >= 0 { + if l2 := e.matchlenLong(s, t2, src); l2 > l { + t = t2 + l = l2 + } + } + } + } + // Extend backwards for t > 0 && s > nextEmit && src[t-1] == src[s-1] { s-- @@ -218,7 +243,15 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { l++ } if nextEmit < s { - emitLiteral(dst, src[nextEmit:s]) + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } } if false { if t >= s { diff --git a/vendor/github.com/klauspost/compress/flate/regmask_amd64.go b/vendor/github.com/klauspost/compress/flate/regmask_amd64.go new file mode 100644 index 0000000..6ed2806 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/regmask_amd64.go @@ -0,0 +1,37 @@ +package flate + +const ( + // Masks for shifts with register sizes of the shift value. + // This can be used to work around the x86 design of shifting by mod register size. + // It can be used when a variable shift is always smaller than the register size. + + // reg8SizeMaskX - shift value is 8 bits, shifted is X + reg8SizeMask8 = 7 + reg8SizeMask16 = 15 + reg8SizeMask32 = 31 + reg8SizeMask64 = 63 + + // reg16SizeMaskX - shift value is 16 bits, shifted is X + reg16SizeMask8 = reg8SizeMask8 + reg16SizeMask16 = reg8SizeMask16 + reg16SizeMask32 = reg8SizeMask32 + reg16SizeMask64 = reg8SizeMask64 + + // reg32SizeMaskX - shift value is 32 bits, shifted is X + reg32SizeMask8 = reg8SizeMask8 + reg32SizeMask16 = reg8SizeMask16 + reg32SizeMask32 = reg8SizeMask32 + reg32SizeMask64 = reg8SizeMask64 + + // reg64SizeMaskX - shift value is 64 bits, shifted is X + reg64SizeMask8 = reg8SizeMask8 + reg64SizeMask16 = reg8SizeMask16 + reg64SizeMask32 = reg8SizeMask32 + reg64SizeMask64 = reg8SizeMask64 + + // regSizeMaskUintX - shift value is uint, shifted is X + regSizeMaskUint8 = reg8SizeMask8 + regSizeMaskUint16 = reg8SizeMask16 + regSizeMaskUint32 = reg8SizeMask32 + regSizeMaskUint64 = reg8SizeMask64 +) diff --git a/vendor/github.com/klauspost/compress/flate/regmask_other.go b/vendor/github.com/klauspost/compress/flate/regmask_other.go new file mode 100644 index 0000000..1b7a2cb --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/regmask_other.go @@ -0,0 +1,40 @@ +//go:build !amd64 +// +build !amd64 + +package flate + +const ( + // Masks for shifts with register sizes of the shift value. + // This can be used to work around the x86 design of shifting by mod register size. + // It can be used when a variable shift is always smaller than the register size. + + // reg8SizeMaskX - shift value is 8 bits, shifted is X + reg8SizeMask8 = 0xff + reg8SizeMask16 = 0xff + reg8SizeMask32 = 0xff + reg8SizeMask64 = 0xff + + // reg16SizeMaskX - shift value is 16 bits, shifted is X + reg16SizeMask8 = 0xffff + reg16SizeMask16 = 0xffff + reg16SizeMask32 = 0xffff + reg16SizeMask64 = 0xffff + + // reg32SizeMaskX - shift value is 32 bits, shifted is X + reg32SizeMask8 = 0xffffffff + reg32SizeMask16 = 0xffffffff + reg32SizeMask32 = 0xffffffff + reg32SizeMask64 = 0xffffffff + + // reg64SizeMaskX - shift value is 64 bits, shifted is X + reg64SizeMask8 = 0xffffffffffffffff + reg64SizeMask16 = 0xffffffffffffffff + reg64SizeMask32 = 0xffffffffffffffff + reg64SizeMask64 = 0xffffffffffffffff + + // regSizeMaskUintX - shift value is uint, shifted is X + regSizeMaskUint8 = ^uint(0) + regSizeMaskUint16 = ^uint(0) + regSizeMaskUint32 = ^uint(0) + regSizeMaskUint64 = ^uint(0) +) diff --git a/vendor/github.com/klauspost/compress/flate/stateless.go b/vendor/github.com/klauspost/compress/flate/stateless.go index 53e8991..544162a 100644 --- a/vendor/github.com/klauspost/compress/flate/stateless.go +++ b/vendor/github.com/klauspost/compress/flate/stateless.go @@ -249,7 +249,15 @@ func statelessEnc(dst *tokens, src []byte, startAt int16) { l++ } if nextEmit < s { - emitLiteral(dst, src[nextEmit:s]) + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } } // Save the match found diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go index f9abf60..d818790 100644 --- a/vendor/github.com/klauspost/compress/flate/token.go +++ b/vendor/github.com/klauspost/compress/flate/token.go @@ -13,14 +13,16 @@ import ( ) const ( - // 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused - // 8 bits: xlength = length - MIN_MATCH_LENGTH - // 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal - lengthShift = 22 - offsetMask = 1<maxnumlit offHist [32]uint16 // offset codes litHist [256]uint16 // codes 0->255 - n uint16 // Must be able to contain maxStoreBlockSize + nFilled int + n uint16 // Must be able to contain maxStoreBlockSize tokens [maxStoreBlockSize + 1]token } @@ -139,7 +141,7 @@ func (t *tokens) Reset() { return } t.n = 0 - t.nLits = 0 + t.nFilled = 0 for i := range t.litHist[:] { t.litHist[i] = 0 } @@ -158,12 +160,12 @@ func (t *tokens) Fill() { for i, v := range t.litHist[:] { if v == 0 { t.litHist[i] = 1 - t.nLits++ + t.nFilled++ } } for i, v := range t.extraHist[:literalCount-256] { if v == 0 { - t.nLits++ + t.nFilled++ t.extraHist[i] = 1 } } @@ -187,26 +189,23 @@ func (t *tokens) indexTokens(in []token) { t.AddLiteral(tok.literal()) continue } - t.AddMatch(uint32(tok.length()), tok.offset()) + t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask) } } // emitLiteral writes a literal chunk and returns the number of bytes written. func emitLiteral(dst *tokens, lit []byte) { - ol := int(dst.n) - for i, v := range lit { - dst.tokens[(i+ol)&maxStoreBlockSize] = token(v) + for _, v := range lit { + dst.tokens[dst.n] = token(v) dst.litHist[v]++ + dst.n++ } - dst.n += uint16(len(lit)) - dst.nLits += len(lit) } func (t *tokens) AddLiteral(lit byte) { t.tokens[t.n] = token(lit) t.litHist[lit]++ t.n++ - t.nLits++ } // from https://stackoverflow.com/a/28730362 @@ -227,12 +226,13 @@ func (t *tokens) EstimatedBits() int { shannon := float32(0) bits := int(0) nMatches := 0 - if t.nLits > 0 { - invTotal := 1.0 / float32(t.nLits) + total := int(t.n) + t.nFilled + if total > 0 { + invTotal := 1.0 / float32(total) for _, v := range t.litHist[:] { if v > 0 { n := float32(v) - shannon += -mFastLog2(n*invTotal) * n + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n } } // Just add 15 for EOB @@ -240,7 +240,7 @@ func (t *tokens) EstimatedBits() int { for i, v := range t.extraHist[1 : literalCount-256] { if v > 0 { n := float32(v) - shannon += -mFastLog2(n*invTotal) * n + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n bits += int(lengthExtraBits[i&31]) * int(v) nMatches += int(v) } @@ -251,7 +251,7 @@ func (t *tokens) EstimatedBits() int { for i, v := range t.offHist[:offsetCodeCount] { if v > 0 { n := float32(v) - shannon += -mFastLog2(n*invTotal) * n + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n bits += int(offsetExtraBits[i&31]) * int(v) } } @@ -270,11 +270,12 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { panic(fmt.Errorf("invalid offset: %v", xoffset)) } } - t.nLits++ - lengthCode := lengthCodes1[uint8(xlength)] & 31 + oCode := offsetCode(xoffset) + xoffset |= oCode << 16 + + t.extraHist[lengthCodes1[uint8(xlength)]]++ + t.offHist[oCode&31]++ t.tokens[t.n] = token(matchType | xlength< 0 { xl := xlength if xl > 258 { // We need to have at least baseMatchLength left over for next loop. - xl = 258 - baseMatchLength + if xl > 258+baseMatchLength { + xl = 258 + } else { + xl = 258 - baseMatchLength + } } xlength -= xl - xl -= 3 - t.nLits++ - lengthCode := lengthCodes1[uint8(xl)] & 31 + xl -= baseMatchLength + t.extraHist[lengthCodes1[uint8(xl)]]++ + t.offHist[oc&31]++ t.tokens[t.n] = token(matchType | uint32(xl)<> lengthShift) } -// The code is never more than 8 bits, but is returned as uint32 for convenience. -func lengthCode(len uint8) uint32 { return uint32(lengthCodes[len]) } +// Convert length to code. +func lengthCode(len uint8) uint8 { return lengthCodes[len] } // Returns the offset code corresponding to a specific offset func offsetCode(off uint32) uint32 { diff --git a/vendor/github.com/klauspost/compress/fse/README.md b/vendor/github.com/klauspost/compress/fse/README.md index 27d8ed5..ea7324d 100644 --- a/vendor/github.com/klauspost/compress/fse/README.md +++ b/vendor/github.com/klauspost/compress/fse/README.md @@ -1,79 +1,79 @@ -# Finite State Entropy - -This package provides Finite State Entropy encoding and decoding. - -Finite State Entropy (also referenced as [tANS](https://en.wikipedia.org/wiki/Asymmetric_numeral_systems#tANS)) -encoding provides a fast near-optimal symbol encoding/decoding -for byte blocks as implemented in [zstandard](https://github.com/facebook/zstd). - -This can be used for compressing input with a lot of similar input values to the smallest number of bytes. -This does not perform any multi-byte [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) as LZ coders, -but it can be used as a secondary step to compressors (like Snappy) that does not do entropy encoding. - -* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/fse) - -## News - - * Feb 2018: First implementation released. Consider this beta software for now. - -# Usage - -This package provides a low level interface that allows to compress single independent blocks. - -Each block is separate, and there is no built in integrity checks. -This means that the caller should keep track of block sizes and also do checksums if needed. - -Compressing a block is done via the [`Compress`](https://godoc.org/github.com/klauspost/compress/fse#Compress) function. -You must provide input and will receive the output and maybe an error. - -These error values can be returned: - -| Error | Description | -|---------------------|-----------------------------------------------------------------------------| -| `` | Everything ok, output is returned | -| `ErrIncompressible` | Returned when input is judged to be too hard to compress | -| `ErrUseRLE` | Returned from the compressor when the input is a single byte value repeated | -| `(error)` | An internal error occurred. | - -As can be seen above there are errors that will be returned even under normal operation so it is important to handle these. - -To reduce allocations you can provide a [`Scratch`](https://godoc.org/github.com/klauspost/compress/fse#Scratch) object -that can be re-used for successive calls. Both compression and decompression accepts a `Scratch` object, and the same -object can be used for both. - -Be aware, that when re-using a `Scratch` object that the *output* buffer is also re-used, so if you are still using this -you must set the `Out` field in the scratch to nil. The same buffer is used for compression and decompression output. - -Decompressing is done by calling the [`Decompress`](https://godoc.org/github.com/klauspost/compress/fse#Decompress) function. -You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back -your input was likely corrupted. - -It is important to note that a successful decoding does *not* mean your output matches your original input. -There are no integrity checks, so relying on errors from the decompressor does not assure your data is valid. - -For more detailed usage, see examples in the [godoc documentation](https://godoc.org/github.com/klauspost/compress/fse#pkg-examples). - -# Performance - -A lot of factors are affecting speed. Block sizes and compressibility of the material are primary factors. -All compression functions are currently only running on the calling goroutine so only one core will be used per block. - -The compressor is significantly faster if symbols are kept as small as possible. The highest byte value of the input -is used to reduce some of the processing, so if all your input is above byte value 64 for instance, it may be -beneficial to transpose all your input values down by 64. - -With moderate block sizes around 64k speed are typically 200MB/s per core for compression and -around 300MB/s decompression speed. - -The same hardware typically does Huffman (deflate) encoding at 125MB/s and decompression at 100MB/s. - -# Plans - -At one point, more internals will be exposed to facilitate more "expert" usage of the components. - -A streaming interface is also likely to be implemented. Likely compatible with [FSE stream format](https://github.com/Cyan4973/FiniteStateEntropy/blob/dev/programs/fileio.c#L261). - -# Contributing - -Contributions are always welcome. Be aware that adding public functions will require good justification and breaking +# Finite State Entropy + +This package provides Finite State Entropy encoding and decoding. + +Finite State Entropy (also referenced as [tANS](https://en.wikipedia.org/wiki/Asymmetric_numeral_systems#tANS)) +encoding provides a fast near-optimal symbol encoding/decoding +for byte blocks as implemented in [zstandard](https://github.com/facebook/zstd). + +This can be used for compressing input with a lot of similar input values to the smallest number of bytes. +This does not perform any multi-byte [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) as LZ coders, +but it can be used as a secondary step to compressors (like Snappy) that does not do entropy encoding. + +* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/fse) + +## News + + * Feb 2018: First implementation released. Consider this beta software for now. + +# Usage + +This package provides a low level interface that allows to compress single independent blocks. + +Each block is separate, and there is no built in integrity checks. +This means that the caller should keep track of block sizes and also do checksums if needed. + +Compressing a block is done via the [`Compress`](https://godoc.org/github.com/klauspost/compress/fse#Compress) function. +You must provide input and will receive the output and maybe an error. + +These error values can be returned: + +| Error | Description | +|---------------------|-----------------------------------------------------------------------------| +| `` | Everything ok, output is returned | +| `ErrIncompressible` | Returned when input is judged to be too hard to compress | +| `ErrUseRLE` | Returned from the compressor when the input is a single byte value repeated | +| `(error)` | An internal error occurred. | + +As can be seen above there are errors that will be returned even under normal operation so it is important to handle these. + +To reduce allocations you can provide a [`Scratch`](https://godoc.org/github.com/klauspost/compress/fse#Scratch) object +that can be re-used for successive calls. Both compression and decompression accepts a `Scratch` object, and the same +object can be used for both. + +Be aware, that when re-using a `Scratch` object that the *output* buffer is also re-used, so if you are still using this +you must set the `Out` field in the scratch to nil. The same buffer is used for compression and decompression output. + +Decompressing is done by calling the [`Decompress`](https://godoc.org/github.com/klauspost/compress/fse#Decompress) function. +You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back +your input was likely corrupted. + +It is important to note that a successful decoding does *not* mean your output matches your original input. +There are no integrity checks, so relying on errors from the decompressor does not assure your data is valid. + +For more detailed usage, see examples in the [godoc documentation](https://godoc.org/github.com/klauspost/compress/fse#pkg-examples). + +# Performance + +A lot of factors are affecting speed. Block sizes and compressibility of the material are primary factors. +All compression functions are currently only running on the calling goroutine so only one core will be used per block. + +The compressor is significantly faster if symbols are kept as small as possible. The highest byte value of the input +is used to reduce some of the processing, so if all your input is above byte value 64 for instance, it may be +beneficial to transpose all your input values down by 64. + +With moderate block sizes around 64k speed are typically 200MB/s per core for compression and +around 300MB/s decompression speed. + +The same hardware typically does Huffman (deflate) encoding at 125MB/s and decompression at 100MB/s. + +# Plans + +At one point, more internals will be exposed to facilitate more "expert" usage of the components. + +A streaming interface is also likely to be implemented. Likely compatible with [FSE stream format](https://github.com/Cyan4973/FiniteStateEntropy/blob/dev/programs/fileio.c#L261). + +# Contributing + +Contributions are always welcome. Be aware that adding public functions will require good justification and breaking changes will likely not be accepted. If in doubt open an issue before writing the PR. \ No newline at end of file diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go index b69237c..6f34191 100644 --- a/vendor/github.com/klauspost/compress/fse/compress.go +++ b/vendor/github.com/klauspost/compress/fse/compress.go @@ -92,7 +92,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, tableLog uint8, first symbolTra im := int32((nbBitsOut << 16) - first.deltaNbBits) lu := (im >> nbBitsOut) + first.deltaFindState c.state = c.stateTable[lu] - return } // encode the output symbol provided and write it to the bitstream. @@ -301,7 +300,7 @@ func (s *Scratch) writeCount() error { out[outP+1] = byte(bitStream >> 8) outP += (bitCount + 7) / 8 - if uint16(charnum) > s.symbolLen { + if charnum > s.symbolLen { return errors.New("internal error: charnum > s.symbolLen") } s.Out = out[:outP] @@ -331,7 +330,7 @@ type cTable struct { func (s *Scratch) allocCtable() { tableSize := 1 << s.actualTableLog // get tableSymbol that is big enough. - if cap(s.ct.tableSymbol) < int(tableSize) { + if cap(s.ct.tableSymbol) < tableSize { s.ct.tableSymbol = make([]byte, tableSize) } s.ct.tableSymbol = s.ct.tableSymbol[:tableSize] @@ -565,8 +564,8 @@ func (s *Scratch) normalizeCount2() error { distributed uint32 total = uint32(s.br.remain()) tableLog = s.actualTableLog - lowThreshold = uint32(total >> tableLog) - lowOne = uint32((total * 3) >> (tableLog + 1)) + lowThreshold = total >> tableLog + lowOne = (total * 3) >> (tableLog + 1) ) for i, cnt := range s.count[:s.symbolLen] { if cnt == 0 { @@ -591,7 +590,7 @@ func (s *Scratch) normalizeCount2() error { if (total / toDistribute) > lowOne { // risk of rounding to zero - lowOne = uint32((total * 3) / (toDistribute * 2)) + lowOne = (total * 3) / (toDistribute * 2) for i, cnt := range s.count[:s.symbolLen] { if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) { s.norm[i] = 1 diff --git a/vendor/github.com/klauspost/compress/fse/decompress.go b/vendor/github.com/klauspost/compress/fse/decompress.go index 413ec3b..926f5f1 100644 --- a/vendor/github.com/klauspost/compress/fse/decompress.go +++ b/vendor/github.com/klauspost/compress/fse/decompress.go @@ -172,7 +172,7 @@ type decSymbol struct { // allocDtable will allocate decoding tables if they are not big enough. func (s *Scratch) allocDtable() { tableSize := 1 << s.actualTableLog - if cap(s.decTable) < int(tableSize) { + if cap(s.decTable) < tableSize { s.decTable = make([]decSymbol, tableSize) } s.decTable = s.decTable[:tableSize] @@ -340,7 +340,7 @@ type decoder struct { func (d *decoder) init(in *bitReader, dt []decSymbol, tableLog uint8) { d.dt = dt d.br = in - d.state = uint16(in.getBits(tableLog)) + d.state = in.getBits(tableLog) } // next returns the next symbol and sets the next state. diff --git a/vendor/github.com/klauspost/compress/gen.sh b/vendor/github.com/klauspost/compress/gen.sh new file mode 100644 index 0000000..aff9422 --- /dev/null +++ b/vendor/github.com/klauspost/compress/gen.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +cd s2/cmd/_s2sx/ || exit 1 +go generate . diff --git a/vendor/github.com/klauspost/compress/huff0/README.md b/vendor/github.com/klauspost/compress/huff0/README.md index ec4f909..8b6e5c6 100644 --- a/vendor/github.com/klauspost/compress/huff0/README.md +++ b/vendor/github.com/klauspost/compress/huff0/README.md @@ -1,87 +1,89 @@ -# Huff0 entropy compression - -This package provides Huff0 encoding and decoding as used in zstd. - -[Huff0](https://github.com/Cyan4973/FiniteStateEntropy#new-generation-entropy-coders), -a Huffman codec designed for modern CPU, featuring OoO (Out of Order) operations on multiple ALU -(Arithmetic Logic Unit), achieving extremely fast compression and decompression speeds. - -This can be used for compressing input with a lot of similar input values to the smallest number of bytes. -This does not perform any multi-byte [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) as LZ coders, -but it can be used as a secondary step to compressors (like Snappy) that does not do entropy encoding. - -* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/huff0) - -## News - - * Mar 2018: First implementation released. Consider this beta software for now. - -# Usage - -This package provides a low level interface that allows to compress single independent blocks. - -Each block is separate, and there is no built in integrity checks. -This means that the caller should keep track of block sizes and also do checksums if needed. - -Compressing a block is done via the [`Compress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Compress1X) and -[`Compress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Compress4X) functions. -You must provide input and will receive the output and maybe an error. - -These error values can be returned: - -| Error | Description | -|---------------------|-----------------------------------------------------------------------------| -| `` | Everything ok, output is returned | -| `ErrIncompressible` | Returned when input is judged to be too hard to compress | -| `ErrUseRLE` | Returned from the compressor when the input is a single byte value repeated | -| `ErrTooBig` | Returned if the input block exceeds the maximum allowed size (128 Kib) | -| `(error)` | An internal error occurred. | - - -As can be seen above some of there are errors that will be returned even under normal operation so it is important to handle these. - -To reduce allocations you can provide a [`Scratch`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch) object -that can be re-used for successive calls. Both compression and decompression accepts a `Scratch` object, and the same -object can be used for both. - -Be aware, that when re-using a `Scratch` object that the *output* buffer is also re-used, so if you are still using this -you must set the `Out` field in the scratch to nil. The same buffer is used for compression and decompression output. - -The `Scratch` object will retain state that allows to re-use previous tables for encoding and decoding. - -## Tables and re-use - -Huff0 allows for reusing tables from the previous block to save space if that is expected to give better/faster results. - -The Scratch object allows you to set a [`ReusePolicy`](https://godoc.org/github.com/klauspost/compress/huff0#ReusePolicy) -that controls this behaviour. See the documentation for details. This can be altered between each block. - -Do however note that this information is *not* stored in the output block and it is up to the users of the package to -record whether [`ReadTable`](https://godoc.org/github.com/klauspost/compress/huff0#ReadTable) should be called, -based on the boolean reported back from the CompressXX call. - -If you want to store the table separate from the data, you can access them as `OutData` and `OutTable` on the -[`Scratch`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch) object. - -## Decompressing - -The first part of decoding is to initialize the decoding table through [`ReadTable`](https://godoc.org/github.com/klauspost/compress/huff0#ReadTable). -This will initialize the decoding tables. -You can supply the complete block to `ReadTable` and it will return the data part of the block -which can be given to the decompressor. - -Decompressing is done by calling the [`Decompress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress1X) -or [`Decompress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress4X) function. - -For concurrently decompressing content with a fixed table a stateless [`Decoder`](https://godoc.org/github.com/klauspost/compress/huff0#Decoder) can be requested which will remain correct as long as the scratch is unchanged. The capacity of the provided slice indicates the expected output size. - -You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back -your input was likely corrupted. - -It is important to note that a successful decoding does *not* mean your output matches your original input. -There are no integrity checks, so relying on errors from the decompressor does not assure your data is valid. - -# Contributing - -Contributions are always welcome. Be aware that adding public functions will require good justification and breaking -changes will likely not be accepted. If in doubt open an issue before writing the PR. +# Huff0 entropy compression + +This package provides Huff0 encoding and decoding as used in zstd. + +[Huff0](https://github.com/Cyan4973/FiniteStateEntropy#new-generation-entropy-coders), +a Huffman codec designed for modern CPU, featuring OoO (Out of Order) operations on multiple ALU +(Arithmetic Logic Unit), achieving extremely fast compression and decompression speeds. + +This can be used for compressing input with a lot of similar input values to the smallest number of bytes. +This does not perform any multi-byte [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) as LZ coders, +but it can be used as a secondary step to compressors (like Snappy) that does not do entropy encoding. + +* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/huff0) + +## News + +This is used as part of the [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and decompression package. + +This ensures that most functionality is well tested. + +# Usage + +This package provides a low level interface that allows to compress single independent blocks. + +Each block is separate, and there is no built in integrity checks. +This means that the caller should keep track of block sizes and also do checksums if needed. + +Compressing a block is done via the [`Compress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Compress1X) and +[`Compress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Compress4X) functions. +You must provide input and will receive the output and maybe an error. + +These error values can be returned: + +| Error | Description | +|---------------------|-----------------------------------------------------------------------------| +| `` | Everything ok, output is returned | +| `ErrIncompressible` | Returned when input is judged to be too hard to compress | +| `ErrUseRLE` | Returned from the compressor when the input is a single byte value repeated | +| `ErrTooBig` | Returned if the input block exceeds the maximum allowed size (128 Kib) | +| `(error)` | An internal error occurred. | + + +As can be seen above some of there are errors that will be returned even under normal operation so it is important to handle these. + +To reduce allocations you can provide a [`Scratch`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch) object +that can be re-used for successive calls. Both compression and decompression accepts a `Scratch` object, and the same +object can be used for both. + +Be aware, that when re-using a `Scratch` object that the *output* buffer is also re-used, so if you are still using this +you must set the `Out` field in the scratch to nil. The same buffer is used for compression and decompression output. + +The `Scratch` object will retain state that allows to re-use previous tables for encoding and decoding. + +## Tables and re-use + +Huff0 allows for reusing tables from the previous block to save space if that is expected to give better/faster results. + +The Scratch object allows you to set a [`ReusePolicy`](https://godoc.org/github.com/klauspost/compress/huff0#ReusePolicy) +that controls this behaviour. See the documentation for details. This can be altered between each block. + +Do however note that this information is *not* stored in the output block and it is up to the users of the package to +record whether [`ReadTable`](https://godoc.org/github.com/klauspost/compress/huff0#ReadTable) should be called, +based on the boolean reported back from the CompressXX call. + +If you want to store the table separate from the data, you can access them as `OutData` and `OutTable` on the +[`Scratch`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch) object. + +## Decompressing + +The first part of decoding is to initialize the decoding table through [`ReadTable`](https://godoc.org/github.com/klauspost/compress/huff0#ReadTable). +This will initialize the decoding tables. +You can supply the complete block to `ReadTable` and it will return the data part of the block +which can be given to the decompressor. + +Decompressing is done by calling the [`Decompress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress1X) +or [`Decompress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress4X) function. + +For concurrently decompressing content with a fixed table a stateless [`Decoder`](https://godoc.org/github.com/klauspost/compress/huff0#Decoder) can be requested which will remain correct as long as the scratch is unchanged. The capacity of the provided slice indicates the expected output size. + +You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back +your input was likely corrupted. + +It is important to note that a successful decoding does *not* mean your output matches your original input. +There are no integrity checks, so relying on errors from the decompressor does not assure your data is valid. + +# Contributing + +Contributions are always welcome. Be aware that adding public functions will require good justification and breaking +changes will likely not be accepted. If in doubt open an issue before writing the PR. diff --git a/vendor/github.com/klauspost/compress/huff0/bitreader.go b/vendor/github.com/klauspost/compress/huff0/bitreader.go index a4979e8..451160e 100644 --- a/vendor/github.com/klauspost/compress/huff0/bitreader.go +++ b/vendor/github.com/klauspost/compress/huff0/bitreader.go @@ -8,115 +8,10 @@ package huff0 import ( "encoding/binary" "errors" + "fmt" "io" ) -// bitReader reads a bitstream in reverse. -// The last set bit indicates the start of the stream and is used -// for aligning the input. -type bitReader struct { - in []byte - off uint // next byte to read is at in[off - 1] - value uint64 - bitsRead uint8 -} - -// init initializes and resets the bit reader. -func (b *bitReader) init(in []byte) error { - if len(in) < 1 { - return errors.New("corrupt stream: too short") - } - b.in = in - b.off = uint(len(in)) - // The highest bit of the last byte indicates where to start - v := in[len(in)-1] - if v == 0 { - return errors.New("corrupt stream, did not find end of stream") - } - b.bitsRead = 64 - b.value = 0 - if len(in) >= 8 { - b.fillFastStart() - } else { - b.fill() - b.fill() - } - b.bitsRead += 8 - uint8(highBit32(uint32(v))) - return nil -} - -// peekBitsFast requires that at least one bit is requested every time. -// There are no checks if the buffer is filled. -func (b *bitReader) peekBitsFast(n uint8) uint16 { - const regMask = 64 - 1 - v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) - return v -} - -// fillFast() will make sure at least 32 bits are available. -// There must be at least 4 bytes available. -func (b *bitReader) fillFast() { - if b.bitsRead < 32 { - return - } - - // 2 bounds checks. - v := b.in[b.off-4 : b.off] - v = v[:4] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - b.value = (b.value << 32) | uint64(low) - b.bitsRead -= 32 - b.off -= 4 -} - -func (b *bitReader) advance(n uint8) { - b.bitsRead += n -} - -// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. -func (b *bitReader) fillFastStart() { - // Do single re-slice to avoid bounds checks. - b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) - b.bitsRead = 0 - b.off -= 8 -} - -// fill() will make sure at least 32 bits are available. -func (b *bitReader) fill() { - if b.bitsRead < 32 { - return - } - if b.off > 4 { - v := b.in[b.off-4:] - v = v[:4] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - b.value = (b.value << 32) | uint64(low) - b.bitsRead -= 32 - b.off -= 4 - return - } - for b.off > 0 { - b.value = (b.value << 8) | uint64(b.in[b.off-1]) - b.bitsRead -= 8 - b.off-- - } -} - -// finished returns true if all bits have been read from the bit stream. -func (b *bitReader) finished() bool { - return b.off == 0 && b.bitsRead >= 64 -} - -// close the bitstream and returns an error if out-of-buffer reads occurred. -func (b *bitReader) close() error { - // Release reference. - b.in = nil - if b.bitsRead > 64 { - return io.ErrUnexpectedEOF - } - return nil -} - // bitReader reads a bitstream in reverse. // The last set bit indicates the start of the stream and is used // for aligning the input. @@ -213,10 +108,17 @@ func (b *bitReaderBytes) finished() bool { return b.off == 0 && b.bitsRead >= 64 } +func (b *bitReaderBytes) remaining() uint { + return b.off*8 + uint(64-b.bitsRead) +} + // close the bitstream and returns an error if out-of-buffer reads occurred. func (b *bitReaderBytes) close() error { // Release reference. b.in = nil + if b.remaining() > 0 { + return fmt.Errorf("corrupt input: %d bits remain on stream", b.remaining()) + } if b.bitsRead > 64 { return io.ErrUnexpectedEOF } @@ -263,6 +165,11 @@ func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 { return uint16(b.value >> ((64 - n) & 63)) } +// peekTopBits(n) is equvialent to peekBitFast(64 - n) +func (b *bitReaderShifted) peekTopBits(n uint8) uint16 { + return uint16(b.value >> n) +} + func (b *bitReaderShifted) advance(n uint8) { b.bitsRead += n b.value <<= n & 63 @@ -318,10 +225,17 @@ func (b *bitReaderShifted) finished() bool { return b.off == 0 && b.bitsRead >= 64 } +func (b *bitReaderShifted) remaining() uint { + return b.off*8 + uint(64-b.bitsRead) +} + // close the bitstream and returns an error if out-of-buffer reads occurred. func (b *bitReaderShifted) close() error { // Release reference. b.in = nil + if b.remaining() > 0 { + return fmt.Errorf("corrupt input: %d bits remain on stream", b.remaining()) + } if b.bitsRead > 64 { return io.ErrUnexpectedEOF } diff --git a/vendor/github.com/klauspost/compress/huff0/bitwriter.go b/vendor/github.com/klauspost/compress/huff0/bitwriter.go index bda4021..6bce4e8 100644 --- a/vendor/github.com/klauspost/compress/huff0/bitwriter.go +++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go @@ -43,6 +43,11 @@ func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { func (b *bitWriter) encSymbol(ct cTable, symbol byte) { enc := ct[symbol] b.bitContainer |= uint64(enc.val) << (b.nBits & 63) + if false { + if enc.nBits == 0 { + panic("nbits 0") + } + } b.nBits += enc.nBits } @@ -54,6 +59,14 @@ func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) { sh := b.nBits & 63 combined := uint64(encA.val) | (uint64(encB.val) << (encA.nBits & 63)) b.bitContainer |= combined << sh + if false { + if encA.nBits == 0 { + panic("nbitsA 0") + } + if encB.nBits == 0 { + panic("nbitsB 0") + } + } b.nBits += encA.nBits + encB.nBits } diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go index 0843cb0..bc95ac6 100644 --- a/vendor/github.com/klauspost/compress/huff0/compress.go +++ b/vendor/github.com/klauspost/compress/huff0/compress.go @@ -2,6 +2,7 @@ package huff0 import ( "fmt" + "math" "runtime" "sync" ) @@ -77,8 +78,11 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error) // Each symbol present maximum once or too well distributed. return nil, false, ErrIncompressible } - - if s.Reuse == ReusePolicyPrefer && canReuse { + if s.Reuse == ReusePolicyMust && !canReuse { + // We must reuse, but we can't. + return nil, false, ErrIncompressible + } + if (s.Reuse == ReusePolicyPrefer || s.Reuse == ReusePolicyMust) && canReuse { keepTable := s.cTable keepTL := s.actualTableLog s.cTable = s.prevTable @@ -90,6 +94,9 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error) s.OutData = s.Out return s.Out, true, nil } + if s.Reuse == ReusePolicyMust { + return nil, false, ErrIncompressible + } // Do not attempt to re-use later. s.prevTable = s.prevTable[:0] } @@ -155,6 +162,70 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error) return s.Out, false, nil } +// EstimateSizes will estimate the data sizes +func EstimateSizes(in []byte, s *Scratch) (tableSz, dataSz, reuseSz int, err error) { + s, err = s.prepare(in) + if err != nil { + return 0, 0, 0, err + } + + // Create histogram, if none was provided. + tableSz, dataSz, reuseSz = -1, -1, -1 + maxCount := s.maxCount + var canReuse = false + if maxCount == 0 { + maxCount, canReuse = s.countSimple(in) + } else { + canReuse = s.canUseTable(s.prevTable) + } + + // We want the output size to be less than this: + wantSize := len(in) + if s.WantLogLess > 0 { + wantSize -= wantSize >> s.WantLogLess + } + + // Reset for next run. + s.clearCount = true + s.maxCount = 0 + if maxCount >= len(in) { + if maxCount > len(in) { + return 0, 0, 0, fmt.Errorf("maxCount (%d) > length (%d)", maxCount, len(in)) + } + if len(in) == 1 { + return 0, 0, 0, ErrIncompressible + } + // One symbol, use RLE + return 0, 0, 0, ErrUseRLE + } + if maxCount == 1 || maxCount < (len(in)>>7) { + // Each symbol present maximum once or too well distributed. + return 0, 0, 0, ErrIncompressible + } + + // Calculate new table. + err = s.buildCTable() + if err != nil { + return 0, 0, 0, err + } + + if false && !s.canUseTable(s.cTable) { + panic("invalid table generated") + } + + tableSz, err = s.cTable.estTableSize(s) + if err != nil { + return 0, 0, 0, err + } + if canReuse { + reuseSz = s.prevTable.estimateSize(s.count[:s.symbolLen]) + } + dataSz = s.cTable.estimateSize(s.count[:s.symbolLen]) + + // Restore + return tableSz, dataSz, reuseSz, nil +} + func (s *Scratch) compress1X(src []byte) ([]byte, error) { return s.compress1xDo(s.Out, src) } @@ -219,6 +290,10 @@ func (s *Scratch) compress4X(src []byte) ([]byte, error) { if err != nil { return nil, err } + if len(s.Out)-idx > math.MaxUint16 { + // We cannot store the size in the jump table + return nil, ErrIncompressible + } // Write compressed length as little endian before block. if i < 3 { // Last length is not written. @@ -262,6 +337,10 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) { return nil, errs[i] } o := s.tmpOut[i] + if len(o) > math.MaxUint16 { + // We cannot store the size in the jump table + return nil, ErrIncompressible + } // Write compressed length as little endian before block. if i < 3 { // Last length is not written. @@ -397,7 +476,7 @@ func (s *Scratch) buildCTable() error { var startNode = int16(s.symbolLen) nonNullRank := s.symbolLen - 1 - nodeNb := int16(startNode) + nodeNb := startNode huffNode := s.nodes[1 : huffNodesLen+1] // This overlays the slice above, but allows "-1" index lookups. @@ -530,7 +609,6 @@ func (s *Scratch) huffSort() { } nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)} } - return } func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { @@ -574,7 +652,7 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { // Get pos of last (smallest) symbol per rank { - currentNbBits := uint8(maxNbBits) + currentNbBits := maxNbBits for pos := int(n); pos >= 0; pos-- { if huffNode[pos].nbBits >= currentNbBits { continue diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go index a03b263..04f6529 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress.go +++ b/vendor/github.com/klauspost/compress/huff0/decompress.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "io" + "sync" "github.com/klauspost/compress/fse" ) @@ -20,7 +21,7 @@ type dEntrySingle struct { // double-symbols decoding type dEntryDouble struct { - seq uint16 + seq [4]byte nBits uint8 len uint8 } @@ -32,7 +33,7 @@ const use8BitTables = true // The size of the input may be larger than the table definition. // Any content remaining after the table definition will be returned. // If no Scratch is provided a new one is allocated. -// The returned Scratch can be used for decoding input using this table. +// The returned Scratch can be used for encoding or decoding input using this table. func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { s, err = s.prepare(in) if err != nil { @@ -58,8 +59,8 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { s.symbolLen = uint16(oSize) in = in[iSize:] } else { - if len(in) <= int(iSize) { - return s, nil, errors.New("input too small for table") + if len(in) < int(iSize) { + return s, nil, fmt.Errorf("input too small for table, want %d bytes, have %d", iSize, len(in)) } // FSE compressed weights s.fse.DecompressLimit = 255 @@ -138,15 +139,33 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { if len(s.dt.single) != tSize { s.dt.single = make([]dEntrySingle, tSize) } + cTable := s.prevTable + if cap(cTable) < maxSymbolValue+1 { + cTable = make([]cTableEntry, 0, maxSymbolValue+1) + } + cTable = cTable[:maxSymbolValue+1] + s.prevTable = cTable[:s.symbolLen] + s.prevTableLog = s.actualTableLog + for n, w := range s.huffWeight[:s.symbolLen] { if w == 0 { + cTable[n] = cTableEntry{ + val: 0, + nBits: 0, + } continue } length := (uint32(1) << w) >> 1 d := dEntrySingle{ entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8), } + rank := &rankStats[w] + cTable[n] = cTableEntry{ + val: uint16(*rank >> (w - 1)), + nBits: uint8(d.entry), + } + single := s.dt.single[*rank : *rank+length] for i := range single { single[i] = d @@ -198,6 +217,7 @@ func (s *Scratch) Decoder() *Decoder { return &Decoder{ dt: s.dt, actualTableLog: s.actualTableLog, + bufs: &s.decPool, } } @@ -205,6 +225,15 @@ func (s *Scratch) Decoder() *Decoder { type Decoder struct { dt dTable actualTableLog uint8 + bufs *sync.Pool +} + +func (d *Decoder) buffer() *[4][256]byte { + buf, ok := d.bufs.Get().(*[4][256]byte) + if ok { + return buf + } + return &[4][256]byte{} } // Decompress1X will decompress a 1X encoded stream. @@ -231,7 +260,8 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { dt := d.dt.single[:tlSize] // Use temp table to avoid bound checks/append penalty. - var buf [256]byte + bufs := d.buffer() + buf := &bufs[0] var off uint8 for br.off >= 8 { @@ -259,6 +289,7 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { if off == 0 { if len(dst)+256 > maxDecodedSize { br.close() + d.bufs.Put(bufs) return nil, ErrMaxDecodedSizeExceeded } dst = append(dst, buf[:]...) @@ -266,6 +297,7 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { } if len(dst)+int(off) > maxDecodedSize { + d.bufs.Put(bufs) br.close() return nil, ErrMaxDecodedSizeExceeded } @@ -292,6 +324,7 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { } } if len(dst) >= maxDecodedSize { + d.bufs.Put(bufs) br.close() return nil, ErrMaxDecodedSizeExceeded } @@ -301,6 +334,7 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { bitsLeft -= nBits dst = append(dst, uint8(v.entry>>8)) } + d.bufs.Put(bufs) return dst, br.close() } @@ -323,41 +357,258 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { dt := d.dt.single[:256] // Use temp table to avoid bound checks/append penalty. - var buf [256]byte + bufs := d.buffer() + buf := &bufs[0] var off uint8 - shift := (8 - d.actualTableLog) & 7 - - //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog) - for br.off >= 4 { - br.fillFast() - v := dt[br.peekByteFast()>>shift] - br.advance(uint8(v.entry)) - buf[off+0] = uint8(v.entry >> 8) - - v = dt[br.peekByteFast()>>shift] - br.advance(uint8(v.entry)) - buf[off+1] = uint8(v.entry >> 8) - - v = dt[br.peekByteFast()>>shift] - br.advance(uint8(v.entry)) - buf[off+2] = uint8(v.entry >> 8) - - v = dt[br.peekByteFast()>>shift] - br.advance(uint8(v.entry)) - buf[off+3] = uint8(v.entry >> 8) - - off += 4 - if off == 0 { - if len(dst)+256 > maxDecodedSize { - br.close() - return nil, ErrMaxDecodedSizeExceeded + switch d.actualTableLog { + case 8: + const shift = 8 - 8 + for br.off >= 4 { + br.fillFast() + v := dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + + off += 4 + if off == 0 { + if len(dst)+256 > maxDecodedSize { + br.close() + d.bufs.Put(bufs) + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:]...) + } + } + case 7: + const shift = 8 - 7 + for br.off >= 4 { + br.fillFast() + v := dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + + off += 4 + if off == 0 { + if len(dst)+256 > maxDecodedSize { + br.close() + d.bufs.Put(bufs) + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:]...) + } + } + case 6: + const shift = 8 - 6 + for br.off >= 4 { + br.fillFast() + v := dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + + off += 4 + if off == 0 { + if len(dst)+256 > maxDecodedSize { + d.bufs.Put(bufs) + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:]...) + } + } + case 5: + const shift = 8 - 5 + for br.off >= 4 { + br.fillFast() + v := dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + + off += 4 + if off == 0 { + if len(dst)+256 > maxDecodedSize { + d.bufs.Put(bufs) + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:]...) + } + } + case 4: + const shift = 8 - 4 + for br.off >= 4 { + br.fillFast() + v := dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + + off += 4 + if off == 0 { + if len(dst)+256 > maxDecodedSize { + d.bufs.Put(bufs) + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:]...) + } + } + case 3: + const shift = 8 - 3 + for br.off >= 4 { + br.fillFast() + v := dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + + off += 4 + if off == 0 { + if len(dst)+256 > maxDecodedSize { + d.bufs.Put(bufs) + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:]...) + } + } + case 2: + const shift = 8 - 2 + for br.off >= 4 { + br.fillFast() + v := dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + + off += 4 + if off == 0 { + if len(dst)+256 > maxDecodedSize { + d.bufs.Put(bufs) + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:]...) + } + } + case 1: + const shift = 8 - 1 + for br.off >= 4 { + br.fillFast() + v := dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[uint8(br.value>>(56+shift))] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + + off += 4 + if off == 0 { + if len(dst)+256 > maxDecodedSize { + d.bufs.Put(bufs) + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:]...) } - dst = append(dst, buf[:]...) } + default: + d.bufs.Put(bufs) + return nil, fmt.Errorf("invalid tablelog: %d", d.actualTableLog) } if len(dst)+int(off) > maxDecodedSize { + d.bufs.Put(bufs) br.close() return nil, ErrMaxDecodedSizeExceeded } @@ -365,6 +616,8 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { // br < 4, so uint8 is fine bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead)) + shift := (8 - d.actualTableLog) & 7 + for bitsLeft > 0 { if br.bitsRead >= 64-8 { for br.off > 0 { @@ -375,6 +628,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { } if len(dst) >= maxDecodedSize { br.close() + d.bufs.Put(bufs) return nil, ErrMaxDecodedSizeExceeded } v := dt[br.peekByteFast()>>shift] @@ -383,6 +637,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { bitsLeft -= int8(nBits) dst = append(dst, uint8(v.entry>>8)) } + d.bufs.Put(bufs) return dst, br.close() } @@ -402,33 +657,35 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { dt := d.dt.single[:256] // Use temp table to avoid bound checks/append penalty. - var buf [256]byte + bufs := d.buffer() + buf := &bufs[0] var off uint8 - const shift = 0 + const shift = 56 //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog) for br.off >= 4 { br.fillFast() - v := dt[br.peekByteFast()>>shift] + v := dt[uint8(br.value>>shift)] br.advance(uint8(v.entry)) buf[off+0] = uint8(v.entry >> 8) - v = dt[br.peekByteFast()>>shift] + v = dt[uint8(br.value>>shift)] br.advance(uint8(v.entry)) buf[off+1] = uint8(v.entry >> 8) - v = dt[br.peekByteFast()>>shift] + v = dt[uint8(br.value>>shift)] br.advance(uint8(v.entry)) buf[off+2] = uint8(v.entry >> 8) - v = dt[br.peekByteFast()>>shift] + v = dt[uint8(br.value>>shift)] br.advance(uint8(v.entry)) buf[off+3] = uint8(v.entry >> 8) off += 4 if off == 0 { if len(dst)+256 > maxDecodedSize { + d.bufs.Put(bufs) br.close() return nil, ErrMaxDecodedSizeExceeded } @@ -437,6 +694,7 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { } if len(dst)+int(off) > maxDecodedSize { + d.bufs.Put(bufs) br.close() return nil, ErrMaxDecodedSizeExceeded } @@ -453,208 +711,20 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { } } if len(dst) >= maxDecodedSize { + d.bufs.Put(bufs) br.close() return nil, ErrMaxDecodedSizeExceeded } - v := dt[br.peekByteFast()>>shift] + v := dt[br.peekByteFast()] nBits := uint8(v.entry) br.advance(nBits) bitsLeft -= int8(nBits) dst = append(dst, uint8(v.entry>>8)) } + d.bufs.Put(bufs) return dst, br.close() } -// Decompress4X will decompress a 4X encoded stream. -// The length of the supplied input must match the end of a block exactly. -// The *capacity* of the dst slice must match the destination size of -// the uncompressed data exactly. -func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { - if len(d.dt.single) == 0 { - return nil, errors.New("no table loaded") - } - if len(src) < 6+(4*1) { - return nil, errors.New("input too small") - } - if use8BitTables && d.actualTableLog <= 8 { - return d.decompress4X8bit(dst, src) - } - - var br [4]bitReaderShifted - start := 6 - for i := 0; i < 3; i++ { - length := int(src[i*2]) | (int(src[i*2+1]) << 8) - if start+length >= len(src) { - return nil, errors.New("truncated input (or invalid offset)") - } - err := br[i].init(src[start : start+length]) - if err != nil { - return nil, err - } - start += length - } - err := br[3].init(src[start:]) - if err != nil { - return nil, err - } - - // destination, offset to match first output - dstSize := cap(dst) - dst = dst[:dstSize] - out := dst - dstEvery := (dstSize + 3) / 4 - - const tlSize = 1 << tableLogMax - const tlMask = tlSize - 1 - single := d.dt.single[:tlSize] - - // Use temp table to avoid bound checks/append penalty. - var buf [256]byte - var off uint8 - var decoded int - - // Decode 2 values from each decoder/loop. - const bufoff = 256 / 4 - for { - if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { - break - } - - { - const stream = 0 - const stream2 = 1 - br[stream].fillFast() - br[stream2].fillFast() - - val := br[stream].peekBitsFast(d.actualTableLog) - v := single[val&tlMask] - br[stream].advance(uint8(v.entry)) - buf[off+bufoff*stream] = uint8(v.entry >> 8) - - val2 := br[stream2].peekBitsFast(d.actualTableLog) - v2 := single[val2&tlMask] - br[stream2].advance(uint8(v2.entry)) - buf[off+bufoff*stream2] = uint8(v2.entry >> 8) - - val = br[stream].peekBitsFast(d.actualTableLog) - v = single[val&tlMask] - br[stream].advance(uint8(v.entry)) - buf[off+bufoff*stream+1] = uint8(v.entry >> 8) - - val2 = br[stream2].peekBitsFast(d.actualTableLog) - v2 = single[val2&tlMask] - br[stream2].advance(uint8(v2.entry)) - buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8) - } - - { - const stream = 2 - const stream2 = 3 - br[stream].fillFast() - br[stream2].fillFast() - - val := br[stream].peekBitsFast(d.actualTableLog) - v := single[val&tlMask] - br[stream].advance(uint8(v.entry)) - buf[off+bufoff*stream] = uint8(v.entry >> 8) - - val2 := br[stream2].peekBitsFast(d.actualTableLog) - v2 := single[val2&tlMask] - br[stream2].advance(uint8(v2.entry)) - buf[off+bufoff*stream2] = uint8(v2.entry >> 8) - - val = br[stream].peekBitsFast(d.actualTableLog) - v = single[val&tlMask] - br[stream].advance(uint8(v.entry)) - buf[off+bufoff*stream+1] = uint8(v.entry >> 8) - - val2 = br[stream2].peekBitsFast(d.actualTableLog) - v2 = single[val2&tlMask] - br[stream2].advance(uint8(v2.entry)) - buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8) - } - - off += 2 - - if off == bufoff { - if bufoff > dstEvery { - return nil, errors.New("corruption detected: stream overrun 1") - } - copy(out, buf[:bufoff]) - copy(out[dstEvery:], buf[bufoff:bufoff*2]) - copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) - copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) - off = 0 - out = out[bufoff:] - decoded += 256 - // There must at least be 3 buffers left. - if len(out) < dstEvery*3 { - return nil, errors.New("corruption detected: stream overrun 2") - } - } - } - if off > 0 { - ioff := int(off) - if len(out) < dstEvery*3+ioff { - return nil, errors.New("corruption detected: stream overrun 3") - } - copy(out, buf[:off]) - copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) - copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) - copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) - decoded += int(off) * 4 - out = out[off:] - } - - // Decode remaining. - for i := range br { - offset := dstEvery * i - br := &br[i] - bitsLeft := br.off*8 + uint(64-br.bitsRead) - for bitsLeft > 0 { - br.fill() - if false && br.bitsRead >= 32 { - if br.off >= 4 { - v := br.in[br.off-4:] - v = v[:4] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - br.value = (br.value << 32) | uint64(low) - br.bitsRead -= 32 - br.off -= 4 - } else { - for br.off > 0 { - br.value = (br.value << 8) | uint64(br.in[br.off-1]) - br.bitsRead -= 8 - br.off-- - } - } - } - // end inline... - if offset >= len(out) { - return nil, errors.New("corruption detected: stream overrun 4") - } - - // Read value and increment offset. - val := br.peekBitsFast(d.actualTableLog) - v := single[val&tlMask].entry - nBits := uint8(v) - br.advance(nBits) - bitsLeft -= uint(nBits) - out[offset] = uint8(v >> 8) - offset++ - } - decoded += offset - dstEvery*i - err = br.close() - if err != nil { - return nil, err - } - } - if dstSize != decoded { - return nil, errors.New("corruption detected: short output block") - } - return dst, nil -} - // Decompress4X will decompress a 4X encoded stream. // The length of the supplied input must match the end of a block exactly. // The *capacity* of the dst slice must match the destination size of @@ -688,19 +758,18 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { out := dst dstEvery := (dstSize + 3) / 4 - shift := (8 - d.actualTableLog) & 7 + shift := (56 + (8 - d.actualTableLog)) & 63 const tlSize = 1 << 8 - const tlMask = tlSize - 1 single := d.dt.single[:tlSize] // Use temp table to avoid bound checks/append penalty. - var buf [256]byte + buf := d.buffer() var off uint8 var decoded int // Decode 4 values from each decoder/loop. - const bufoff = 256 / 4 + const bufoff = 256 for { if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { break @@ -710,96 +779,109 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { // Interleave 2 decodes. const stream = 0 const stream2 = 1 - br[stream].fillFast() - br[stream2].fillFast() - - v := single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 := single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+1] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+2] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+3] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br1 := &br[stream] + br2 := &br[stream2] + br1.fillFast() + br2.fillFast() + + v := single[uint8(br1.value>>shift)].entry + v2 := single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off] = uint8(v >> 8) + buf[stream2][off] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+1] = uint8(v >> 8) + buf[stream2][off+1] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+2] = uint8(v >> 8) + buf[stream2][off+2] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+3] = uint8(v >> 8) + buf[stream2][off+3] = uint8(v2 >> 8) } { const stream = 2 const stream2 = 3 - br[stream].fillFast() - br[stream2].fillFast() - - v := single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 := single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+1] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+2] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+3] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br1 := &br[stream] + br2 := &br[stream2] + br1.fillFast() + br2.fillFast() + + v := single[uint8(br1.value>>shift)].entry + v2 := single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off] = uint8(v >> 8) + buf[stream2][off] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+1] = uint8(v >> 8) + buf[stream2][off+1] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+2] = uint8(v >> 8) + buf[stream2][off+2] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+3] = uint8(v >> 8) + buf[stream2][off+3] = uint8(v2 >> 8) } off += 4 - if off == bufoff { + if off == 0 { if bufoff > dstEvery { + d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 1") } - copy(out, buf[:bufoff]) - copy(out[dstEvery:], buf[bufoff:bufoff*2]) - copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) - copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) - off = 0 + copy(out, buf[0][:]) + copy(out[dstEvery:], buf[1][:]) + copy(out[dstEvery*2:], buf[2][:]) + copy(out[dstEvery*3:], buf[3][:]) out = out[bufoff:] - decoded += 256 + decoded += bufoff * 4 // There must at least be 3 buffers left. if len(out) < dstEvery*3 { + d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 2") } } @@ -807,23 +889,31 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { if off > 0 { ioff := int(off) if len(out) < dstEvery*3+ioff { + d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 3") } - copy(out, buf[:off]) - copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) - copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) - copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) + copy(out, buf[0][:off]) + copy(out[dstEvery:], buf[1][:off]) + copy(out[dstEvery*2:], buf[2][:off]) + copy(out[dstEvery*3:], buf[3][:off]) decoded += int(off) * 4 out = out[off:] } // Decode remaining. + // Decode remaining. + remainBytes := dstEvery - (decoded / 4) for i := range br { offset := dstEvery * i + endsAt := offset + remainBytes + if endsAt > len(out) { + endsAt = len(out) + } br := &br[i] - bitsLeft := int(br.off*8) + int(64-br.bitsRead) + bitsLeft := br.remaining() for bitsLeft > 0 { if br.finished() { + d.bufs.Put(buf) return nil, io.ErrUnexpectedEOF } if br.bitsRead >= 56 { @@ -843,24 +933,31 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { } } // end inline... - if offset >= len(out) { + if offset >= endsAt { + d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 4") } // Read value and increment offset. - v := single[br.peekByteFast()>>shift].entry + v := single[uint8(br.value>>shift)].entry nBits := uint8(v) br.advance(nBits) - bitsLeft -= int(nBits) + bitsLeft -= uint(nBits) out[offset] = uint8(v >> 8) offset++ } + if offset != endsAt { + d.bufs.Put(buf) + return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) + } decoded += offset - dstEvery*i err = br.close() if err != nil { + d.bufs.Put(buf) return nil, err } } + d.bufs.Put(buf) if dstSize != decoded { return nil, errors.New("corruption detected: short output block") } @@ -896,18 +993,18 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { out := dst dstEvery := (dstSize + 3) / 4 - const shift = 0 + const shift = 56 const tlSize = 1 << 8 const tlMask = tlSize - 1 single := d.dt.single[:tlSize] // Use temp table to avoid bound checks/append penalty. - var buf [256]byte + buf := d.buffer() var off uint8 var decoded int // Decode 4 values from each decoder/loop. - const bufoff = 256 / 4 + const bufoff = 256 for { if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { break @@ -917,96 +1014,109 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { // Interleave 2 decodes. const stream = 0 const stream2 = 1 - br[stream].fillFast() - br[stream2].fillFast() - - v := single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 := single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+1] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+2] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+3] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br1 := &br[stream] + br2 := &br[stream2] + br1.fillFast() + br2.fillFast() + + v := single[uint8(br1.value>>shift)].entry + v2 := single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off] = uint8(v >> 8) + buf[stream2][off] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+1] = uint8(v >> 8) + buf[stream2][off+1] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+2] = uint8(v >> 8) + buf[stream2][off+2] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+3] = uint8(v >> 8) + buf[stream2][off+3] = uint8(v2 >> 8) } { const stream = 2 const stream2 = 3 - br[stream].fillFast() - br[stream2].fillFast() - - v := single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 := single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+1] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+2] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+3] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br1 := &br[stream] + br2 := &br[stream2] + br1.fillFast() + br2.fillFast() + + v := single[uint8(br1.value>>shift)].entry + v2 := single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off] = uint8(v >> 8) + buf[stream2][off] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+1] = uint8(v >> 8) + buf[stream2][off+1] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+2] = uint8(v >> 8) + buf[stream2][off+2] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+3] = uint8(v >> 8) + buf[stream2][off+3] = uint8(v2 >> 8) } off += 4 - if off == bufoff { + if off == 0 { if bufoff > dstEvery { + d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 1") } - copy(out, buf[:bufoff]) - copy(out[dstEvery:], buf[bufoff:bufoff*2]) - copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) - copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) - off = 0 + copy(out, buf[0][:]) + copy(out[dstEvery:], buf[1][:]) + copy(out[dstEvery*2:], buf[2][:]) + copy(out[dstEvery*3:], buf[3][:]) out = out[bufoff:] - decoded += 256 + decoded += bufoff * 4 // There must at least be 3 buffers left. if len(out) < dstEvery*3 { + d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 2") } } @@ -1016,21 +1126,27 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { if len(out) < dstEvery*3+ioff { return nil, errors.New("corruption detected: stream overrun 3") } - copy(out, buf[:off]) - copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) - copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) - copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) + copy(out, buf[0][:off]) + copy(out[dstEvery:], buf[1][:off]) + copy(out[dstEvery*2:], buf[2][:off]) + copy(out[dstEvery*3:], buf[3][:off]) decoded += int(off) * 4 out = out[off:] } // Decode remaining. + remainBytes := dstEvery - (decoded / 4) for i := range br { offset := dstEvery * i + endsAt := offset + remainBytes + if endsAt > len(out) { + endsAt = len(out) + } br := &br[i] - bitsLeft := int(br.off*8) + int(64-br.bitsRead) + bitsLeft := br.remaining() for bitsLeft > 0 { if br.finished() { + d.bufs.Put(buf) return nil, io.ErrUnexpectedEOF } if br.bitsRead >= 56 { @@ -1050,24 +1166,32 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { } } // end inline... - if offset >= len(out) { + if offset >= endsAt { + d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 4") } // Read value and increment offset. - v := single[br.peekByteFast()>>shift].entry + v := single[br.peekByteFast()].entry nBits := uint8(v) br.advance(nBits) - bitsLeft -= int(nBits) + bitsLeft -= uint(nBits) out[offset] = uint8(v >> 8) offset++ } + if offset != endsAt { + d.bufs.Put(buf) + return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) + } + decoded += offset - dstEvery*i err = br.close() if err != nil { + d.bufs.Put(buf) return nil, err } } + d.bufs.Put(buf) if dstSize != decoded { return nil, errors.New("corruption detected: short output block") } diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go new file mode 100644 index 0000000..3415e5d --- /dev/null +++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go @@ -0,0 +1,148 @@ +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +// This file contains the specialisation of Decoder.Decompress4X +// that uses an asm implementation of its main loop. +package huff0 + +import ( + "errors" + "fmt" +) + +// decompress4x_main_loop_x86 is an x86 assembler implementation +// of Decompress4X when tablelog > 8. +//go:noescape +func decompress4x_main_loop_amd64(ctx *decompress4xContext) + +// decompress4x_8b_loop_x86 is an x86 assembler implementation +// of Decompress4X when tablelog <= 8 which decodes 4 entries +// per loop. +//go:noescape +func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext) + +// fallback8BitSize is the size where using Go version is faster. +const fallback8BitSize = 800 + +type decompress4xContext struct { + pbr0 *bitReaderShifted + pbr1 *bitReaderShifted + pbr2 *bitReaderShifted + pbr3 *bitReaderShifted + peekBits uint8 + out *byte + dstEvery int + tbl *dEntrySingle + decoded int + limit *byte +} + +// Decompress4X will decompress a 4X encoded stream. +// The length of the supplied input must match the end of a block exactly. +// The *capacity* of the dst slice must match the destination size of +// the uncompressed data exactly. +func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { + if len(d.dt.single) == 0 { + return nil, errors.New("no table loaded") + } + if len(src) < 6+(4*1) { + return nil, errors.New("input too small") + } + + use8BitTables := d.actualTableLog <= 8 + if cap(dst) < fallback8BitSize && use8BitTables { + return d.decompress4X8bit(dst, src) + } + + var br [4]bitReaderShifted + // Decode "jump table" + start := 6 + for i := 0; i < 3; i++ { + length := int(src[i*2]) | (int(src[i*2+1]) << 8) + if start+length >= len(src) { + return nil, errors.New("truncated input (or invalid offset)") + } + err := br[i].init(src[start : start+length]) + if err != nil { + return nil, err + } + start += length + } + err := br[3].init(src[start:]) + if err != nil { + return nil, err + } + + // destination, offset to match first output + dstSize := cap(dst) + dst = dst[:dstSize] + out := dst + dstEvery := (dstSize + 3) / 4 + + const tlSize = 1 << tableLogMax + const tlMask = tlSize - 1 + single := d.dt.single[:tlSize] + + var decoded int + + if len(out) > 4*4 && !(br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4) { + ctx := decompress4xContext{ + pbr0: &br[0], + pbr1: &br[1], + pbr2: &br[2], + pbr3: &br[3], + peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast() + out: &out[0], + dstEvery: dstEvery, + tbl: &single[0], + limit: &out[dstEvery-4], // Always stop decoding when first buffer gets here to avoid writing OOB on last. + } + if use8BitTables { + decompress4x_8b_main_loop_amd64(&ctx) + } else { + decompress4x_main_loop_amd64(&ctx) + } + + decoded = ctx.decoded + out = out[decoded/4:] + } + + // Decode remaining. + remainBytes := dstEvery - (decoded / 4) + for i := range br { + offset := dstEvery * i + endsAt := offset + remainBytes + if endsAt > len(out) { + endsAt = len(out) + } + br := &br[i] + bitsLeft := br.remaining() + for bitsLeft > 0 { + br.fill() + if offset >= endsAt { + return nil, errors.New("corruption detected: stream overrun 4") + } + + // Read value and increment offset. + val := br.peekBitsFast(d.actualTableLog) + v := single[val&tlMask].entry + nBits := uint8(v) + br.advance(nBits) + bitsLeft -= uint(nBits) + out[offset] = uint8(v >> 8) + offset++ + } + if offset != endsAt { + return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) + } + decoded += offset - dstEvery*i + err = br.close() + if err != nil { + return nil, err + } + } + if dstSize != decoded { + return nil, errors.New("corruption detected: short output block") + } + return dst, nil +} diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s new file mode 100644 index 0000000..06287f5 --- /dev/null +++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s @@ -0,0 +1,662 @@ +// Code generated by command: go run gen.go -out ../decompress_amd64.s -pkg=huff0. DO NOT EDIT. + +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +// func decompress4x_main_loop_amd64(ctx *decompress4xContext) +TEXT ·decompress4x_main_loop_amd64(SB), $8-8 + XORQ DX, DX + + // Preload values + MOVQ ctx+0(FP), AX + MOVBQZX 32(AX), SI + MOVQ 40(AX), DI + MOVQ DI, BX + MOVQ 72(AX), CX + MOVQ CX, (SP) + MOVQ 48(AX), R8 + MOVQ 56(AX), R9 + MOVQ (AX), R10 + MOVQ 8(AX), R11 + MOVQ 16(AX), R12 + MOVQ 24(AX), R13 + + // Main loop +main_loop: + MOVQ BX, DI + CMPQ DI, (SP) + SETGE DL + + // br0.fillFast32() + MOVQ 32(R10), R14 + MOVBQZX 40(R10), R15 + CMPQ R15, $0x20 + JBE skip_fill0 + MOVQ 24(R10), AX + SUBQ $0x20, R15 + SUBQ $0x04, AX + MOVQ (R10), BP + + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (AX)(BP*1), BP + MOVQ R15, CX + SHLQ CL, BP + MOVQ AX, 24(R10) + ORQ BP, R14 + + // exhausted = exhausted || (br0.off < 4) + CMPQ AX, $0x04 + SETLT AL + ORB AL, DL + +skip_fill0: + // val0 := br0.peekTopBits(peekBits) + MOVQ R14, BP + MOVQ SI, CX + SHRQ CL, BP + + // v0 := table[val0&mask] + MOVW (R9)(BP*2), CX + + // br0.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R14 + ADDB CL, R15 + + // val1 := br0.peekTopBits(peekBits) + MOVQ SI, CX + MOVQ R14, BP + SHRQ CL, BP + + // v1 := table[val1&mask] + MOVW (R9)(BP*2), CX + + // br0.advance(uint8(v1.entry)) + MOVB CH, AH + SHLQ CL, R14 + ADDB CL, R15 + + // these two writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + MOVW AX, (DI) + + // update the bitrader reader structure + MOVQ R14, 32(R10) + MOVB R15, 40(R10) + ADDQ R8, DI + + // br1.fillFast32() + MOVQ 32(R11), R14 + MOVBQZX 40(R11), R15 + CMPQ R15, $0x20 + JBE skip_fill1 + MOVQ 24(R11), AX + SUBQ $0x20, R15 + SUBQ $0x04, AX + MOVQ (R11), BP + + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (AX)(BP*1), BP + MOVQ R15, CX + SHLQ CL, BP + MOVQ AX, 24(R11) + ORQ BP, R14 + + // exhausted = exhausted || (br1.off < 4) + CMPQ AX, $0x04 + SETLT AL + ORB AL, DL + +skip_fill1: + // val0 := br1.peekTopBits(peekBits) + MOVQ R14, BP + MOVQ SI, CX + SHRQ CL, BP + + // v0 := table[val0&mask] + MOVW (R9)(BP*2), CX + + // br1.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R14 + ADDB CL, R15 + + // val1 := br1.peekTopBits(peekBits) + MOVQ SI, CX + MOVQ R14, BP + SHRQ CL, BP + + // v1 := table[val1&mask] + MOVW (R9)(BP*2), CX + + // br1.advance(uint8(v1.entry)) + MOVB CH, AH + SHLQ CL, R14 + ADDB CL, R15 + + // these two writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + MOVW AX, (DI) + + // update the bitrader reader structure + MOVQ R14, 32(R11) + MOVB R15, 40(R11) + ADDQ R8, DI + + // br2.fillFast32() + MOVQ 32(R12), R14 + MOVBQZX 40(R12), R15 + CMPQ R15, $0x20 + JBE skip_fill2 + MOVQ 24(R12), AX + SUBQ $0x20, R15 + SUBQ $0x04, AX + MOVQ (R12), BP + + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (AX)(BP*1), BP + MOVQ R15, CX + SHLQ CL, BP + MOVQ AX, 24(R12) + ORQ BP, R14 + + // exhausted = exhausted || (br2.off < 4) + CMPQ AX, $0x04 + SETLT AL + ORB AL, DL + +skip_fill2: + // val0 := br2.peekTopBits(peekBits) + MOVQ R14, BP + MOVQ SI, CX + SHRQ CL, BP + + // v0 := table[val0&mask] + MOVW (R9)(BP*2), CX + + // br2.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R14 + ADDB CL, R15 + + // val1 := br2.peekTopBits(peekBits) + MOVQ SI, CX + MOVQ R14, BP + SHRQ CL, BP + + // v1 := table[val1&mask] + MOVW (R9)(BP*2), CX + + // br2.advance(uint8(v1.entry)) + MOVB CH, AH + SHLQ CL, R14 + ADDB CL, R15 + + // these two writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + MOVW AX, (DI) + + // update the bitrader reader structure + MOVQ R14, 32(R12) + MOVB R15, 40(R12) + ADDQ R8, DI + + // br3.fillFast32() + MOVQ 32(R13), R14 + MOVBQZX 40(R13), R15 + CMPQ R15, $0x20 + JBE skip_fill3 + MOVQ 24(R13), AX + SUBQ $0x20, R15 + SUBQ $0x04, AX + MOVQ (R13), BP + + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (AX)(BP*1), BP + MOVQ R15, CX + SHLQ CL, BP + MOVQ AX, 24(R13) + ORQ BP, R14 + + // exhausted = exhausted || (br3.off < 4) + CMPQ AX, $0x04 + SETLT AL + ORB AL, DL + +skip_fill3: + // val0 := br3.peekTopBits(peekBits) + MOVQ R14, BP + MOVQ SI, CX + SHRQ CL, BP + + // v0 := table[val0&mask] + MOVW (R9)(BP*2), CX + + // br3.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R14 + ADDB CL, R15 + + // val1 := br3.peekTopBits(peekBits) + MOVQ SI, CX + MOVQ R14, BP + SHRQ CL, BP + + // v1 := table[val1&mask] + MOVW (R9)(BP*2), CX + + // br3.advance(uint8(v1.entry)) + MOVB CH, AH + SHLQ CL, R14 + ADDB CL, R15 + + // these two writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + MOVW AX, (DI) + + // update the bitrader reader structure + MOVQ R14, 32(R13) + MOVB R15, 40(R13) + ADDQ $0x02, BX + TESTB DL, DL + JZ main_loop + MOVQ ctx+0(FP), AX + MOVQ 40(AX), CX + MOVQ BX, DX + SUBQ CX, DX + SHLQ $0x02, DX + MOVQ DX, 64(AX) + RET + +// func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext) +TEXT ·decompress4x_8b_main_loop_amd64(SB), $16-8 + XORQ DX, DX + + // Preload values + MOVQ ctx+0(FP), CX + MOVBQZX 32(CX), BX + MOVQ 40(CX), SI + MOVQ SI, (SP) + MOVQ 72(CX), DX + MOVQ DX, 8(SP) + MOVQ 48(CX), DI + MOVQ 56(CX), R8 + MOVQ (CX), R9 + MOVQ 8(CX), R10 + MOVQ 16(CX), R11 + MOVQ 24(CX), R12 + + // Main loop +main_loop: + MOVQ (SP), SI + CMPQ SI, 8(SP) + SETGE DL + + // br1000.fillFast32() + MOVQ 32(R9), R13 + MOVBQZX 40(R9), R14 + CMPQ R14, $0x20 + JBE skip_fill1000 + MOVQ 24(R9), R15 + SUBQ $0x20, R14 + SUBQ $0x04, R15 + MOVQ (R9), BP + + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (R15)(BP*1), BP + MOVQ R14, CX + SHLQ CL, BP + MOVQ R15, 24(R9) + ORQ BP, R13 + + // exhausted = exhausted || (br1000.off < 4) + CMPQ R15, $0x04 + SETLT AL + ORB AL, DL + +skip_fill1000: + // val0 := br0.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v0 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br0.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R13 + ADDB CL, R14 + + // val1 := br0.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v1 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br0.advance(uint8(v1.entry) + MOVB CH, AH + SHLQ CL, R13 + ADDB CL, R14 + BSWAPL AX + + // val2 := br0.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v2 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br0.advance(uint8(v2.entry) + MOVB CH, AH + SHLQ CL, R13 + ADDB CL, R14 + + // val3 := br0.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v3 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br0.advance(uint8(v3.entry) + MOVB CH, AL + SHLQ CL, R13 + ADDB CL, R14 + BSWAPL AX + + // these four writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + // out[id * dstEvery + 3] = uint8(v2.entry >> 8) + // out[id * dstEvery + 4] = uint8(v3.entry >> 8) + MOVL AX, (SI) + + // update the bitreader reader structure + MOVQ R13, 32(R9) + MOVB R14, 40(R9) + ADDQ DI, SI + + // br1001.fillFast32() + MOVQ 32(R10), R13 + MOVBQZX 40(R10), R14 + CMPQ R14, $0x20 + JBE skip_fill1001 + MOVQ 24(R10), R15 + SUBQ $0x20, R14 + SUBQ $0x04, R15 + MOVQ (R10), BP + + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (R15)(BP*1), BP + MOVQ R14, CX + SHLQ CL, BP + MOVQ R15, 24(R10) + ORQ BP, R13 + + // exhausted = exhausted || (br1001.off < 4) + CMPQ R15, $0x04 + SETLT AL + ORB AL, DL + +skip_fill1001: + // val0 := br1.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v0 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br1.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R13 + ADDB CL, R14 + + // val1 := br1.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v1 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br1.advance(uint8(v1.entry) + MOVB CH, AH + SHLQ CL, R13 + ADDB CL, R14 + BSWAPL AX + + // val2 := br1.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v2 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br1.advance(uint8(v2.entry) + MOVB CH, AH + SHLQ CL, R13 + ADDB CL, R14 + + // val3 := br1.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v3 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br1.advance(uint8(v3.entry) + MOVB CH, AL + SHLQ CL, R13 + ADDB CL, R14 + BSWAPL AX + + // these four writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + // out[id * dstEvery + 3] = uint8(v2.entry >> 8) + // out[id * dstEvery + 4] = uint8(v3.entry >> 8) + MOVL AX, (SI) + + // update the bitreader reader structure + MOVQ R13, 32(R10) + MOVB R14, 40(R10) + ADDQ DI, SI + + // br1002.fillFast32() + MOVQ 32(R11), R13 + MOVBQZX 40(R11), R14 + CMPQ R14, $0x20 + JBE skip_fill1002 + MOVQ 24(R11), R15 + SUBQ $0x20, R14 + SUBQ $0x04, R15 + MOVQ (R11), BP + + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (R15)(BP*1), BP + MOVQ R14, CX + SHLQ CL, BP + MOVQ R15, 24(R11) + ORQ BP, R13 + + // exhausted = exhausted || (br1002.off < 4) + CMPQ R15, $0x04 + SETLT AL + ORB AL, DL + +skip_fill1002: + // val0 := br2.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v0 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br2.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R13 + ADDB CL, R14 + + // val1 := br2.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v1 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br2.advance(uint8(v1.entry) + MOVB CH, AH + SHLQ CL, R13 + ADDB CL, R14 + BSWAPL AX + + // val2 := br2.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v2 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br2.advance(uint8(v2.entry) + MOVB CH, AH + SHLQ CL, R13 + ADDB CL, R14 + + // val3 := br2.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v3 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br2.advance(uint8(v3.entry) + MOVB CH, AL + SHLQ CL, R13 + ADDB CL, R14 + BSWAPL AX + + // these four writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + // out[id * dstEvery + 3] = uint8(v2.entry >> 8) + // out[id * dstEvery + 4] = uint8(v3.entry >> 8) + MOVL AX, (SI) + + // update the bitreader reader structure + MOVQ R13, 32(R11) + MOVB R14, 40(R11) + ADDQ DI, SI + + // br1003.fillFast32() + MOVQ 32(R12), R13 + MOVBQZX 40(R12), R14 + CMPQ R14, $0x20 + JBE skip_fill1003 + MOVQ 24(R12), R15 + SUBQ $0x20, R14 + SUBQ $0x04, R15 + MOVQ (R12), BP + + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (R15)(BP*1), BP + MOVQ R14, CX + SHLQ CL, BP + MOVQ R15, 24(R12) + ORQ BP, R13 + + // exhausted = exhausted || (br1003.off < 4) + CMPQ R15, $0x04 + SETLT AL + ORB AL, DL + +skip_fill1003: + // val0 := br3.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v0 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br3.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R13 + ADDB CL, R14 + + // val1 := br3.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v1 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br3.advance(uint8(v1.entry) + MOVB CH, AH + SHLQ CL, R13 + ADDB CL, R14 + BSWAPL AX + + // val2 := br3.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v2 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br3.advance(uint8(v2.entry) + MOVB CH, AH + SHLQ CL, R13 + ADDB CL, R14 + + // val3 := br3.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v3 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br3.advance(uint8(v3.entry) + MOVB CH, AL + SHLQ CL, R13 + ADDB CL, R14 + BSWAPL AX + + // these four writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + // out[id * dstEvery + 3] = uint8(v2.entry >> 8) + // out[id * dstEvery + 4] = uint8(v3.entry >> 8) + MOVL AX, (SI) + + // update the bitreader reader structure + MOVQ R13, 32(R12) + MOVB R14, 40(R12) + ADDQ $0x04, (SP) + TESTB DL, DL + JZ main_loop + MOVQ ctx+0(FP), AX + MOVQ 40(AX), CX + MOVQ (SP), DX + SUBQ CX, DX + SHLQ $0x02, DX + MOVQ DX, 64(AX) + RET diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_generic.go b/vendor/github.com/klauspost/compress/huff0/decompress_generic.go new file mode 100644 index 0000000..126b4d6 --- /dev/null +++ b/vendor/github.com/klauspost/compress/huff0/decompress_generic.go @@ -0,0 +1,193 @@ +//go:build !amd64 || appengine || !gc || noasm +// +build !amd64 appengine !gc noasm + +// This file contains a generic implementation of Decoder.Decompress4X. +package huff0 + +import ( + "errors" + "fmt" +) + +// Decompress4X will decompress a 4X encoded stream. +// The length of the supplied input must match the end of a block exactly. +// The *capacity* of the dst slice must match the destination size of +// the uncompressed data exactly. +func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { + if len(d.dt.single) == 0 { + return nil, errors.New("no table loaded") + } + if len(src) < 6+(4*1) { + return nil, errors.New("input too small") + } + if use8BitTables && d.actualTableLog <= 8 { + return d.decompress4X8bit(dst, src) + } + + var br [4]bitReaderShifted + // Decode "jump table" + start := 6 + for i := 0; i < 3; i++ { + length := int(src[i*2]) | (int(src[i*2+1]) << 8) + if start+length >= len(src) { + return nil, errors.New("truncated input (or invalid offset)") + } + err := br[i].init(src[start : start+length]) + if err != nil { + return nil, err + } + start += length + } + err := br[3].init(src[start:]) + if err != nil { + return nil, err + } + + // destination, offset to match first output + dstSize := cap(dst) + dst = dst[:dstSize] + out := dst + dstEvery := (dstSize + 3) / 4 + + const tlSize = 1 << tableLogMax + const tlMask = tlSize - 1 + single := d.dt.single[:tlSize] + + // Use temp table to avoid bound checks/append penalty. + buf := d.buffer() + var off uint8 + var decoded int + + // Decode 2 values from each decoder/loop. + const bufoff = 256 + for { + if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { + break + } + + { + const stream = 0 + const stream2 = 1 + br[stream].fillFast() + br[stream2].fillFast() + + val := br[stream].peekBitsFast(d.actualTableLog) + val2 := br[stream2].peekBitsFast(d.actualTableLog) + v := single[val&tlMask] + v2 := single[val2&tlMask] + br[stream].advance(uint8(v.entry)) + br[stream2].advance(uint8(v2.entry)) + buf[stream][off] = uint8(v.entry >> 8) + buf[stream2][off] = uint8(v2.entry >> 8) + + val = br[stream].peekBitsFast(d.actualTableLog) + val2 = br[stream2].peekBitsFast(d.actualTableLog) + v = single[val&tlMask] + v2 = single[val2&tlMask] + br[stream].advance(uint8(v.entry)) + br[stream2].advance(uint8(v2.entry)) + buf[stream][off+1] = uint8(v.entry >> 8) + buf[stream2][off+1] = uint8(v2.entry >> 8) + } + + { + const stream = 2 + const stream2 = 3 + br[stream].fillFast() + br[stream2].fillFast() + + val := br[stream].peekBitsFast(d.actualTableLog) + val2 := br[stream2].peekBitsFast(d.actualTableLog) + v := single[val&tlMask] + v2 := single[val2&tlMask] + br[stream].advance(uint8(v.entry)) + br[stream2].advance(uint8(v2.entry)) + buf[stream][off] = uint8(v.entry >> 8) + buf[stream2][off] = uint8(v2.entry >> 8) + + val = br[stream].peekBitsFast(d.actualTableLog) + val2 = br[stream2].peekBitsFast(d.actualTableLog) + v = single[val&tlMask] + v2 = single[val2&tlMask] + br[stream].advance(uint8(v.entry)) + br[stream2].advance(uint8(v2.entry)) + buf[stream][off+1] = uint8(v.entry >> 8) + buf[stream2][off+1] = uint8(v2.entry >> 8) + } + + off += 2 + + if off == 0 { + if bufoff > dstEvery { + d.bufs.Put(buf) + return nil, errors.New("corruption detected: stream overrun 1") + } + copy(out, buf[0][:]) + copy(out[dstEvery:], buf[1][:]) + copy(out[dstEvery*2:], buf[2][:]) + copy(out[dstEvery*3:], buf[3][:]) + out = out[bufoff:] + decoded += bufoff * 4 + // There must at least be 3 buffers left. + if len(out) < dstEvery*3 { + d.bufs.Put(buf) + return nil, errors.New("corruption detected: stream overrun 2") + } + } + } + if off > 0 { + ioff := int(off) + if len(out) < dstEvery*3+ioff { + d.bufs.Put(buf) + return nil, errors.New("corruption detected: stream overrun 3") + } + copy(out, buf[0][:off]) + copy(out[dstEvery:], buf[1][:off]) + copy(out[dstEvery*2:], buf[2][:off]) + copy(out[dstEvery*3:], buf[3][:off]) + decoded += int(off) * 4 + out = out[off:] + } + + // Decode remaining. + remainBytes := dstEvery - (decoded / 4) + for i := range br { + offset := dstEvery * i + endsAt := offset + remainBytes + if endsAt > len(out) { + endsAt = len(out) + } + br := &br[i] + bitsLeft := br.remaining() + for bitsLeft > 0 { + br.fill() + if offset >= endsAt { + d.bufs.Put(buf) + return nil, errors.New("corruption detected: stream overrun 4") + } + + // Read value and increment offset. + val := br.peekBitsFast(d.actualTableLog) + v := single[val&tlMask].entry + nBits := uint8(v) + br.advance(nBits) + bitsLeft -= uint(nBits) + out[offset] = uint8(v >> 8) + offset++ + } + if offset != endsAt { + d.bufs.Put(buf) + return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) + } + decoded += offset - dstEvery*i + err = br.close() + if err != nil { + return nil, err + } + } + d.bufs.Put(buf) + if dstSize != decoded { + return nil, errors.New("corruption detected: short output block") + } + return dst, nil +} diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go index 177d6c4..e8ad17a 100644 --- a/vendor/github.com/klauspost/compress/huff0/huff0.go +++ b/vendor/github.com/klauspost/compress/huff0/huff0.go @@ -8,6 +8,7 @@ import ( "fmt" "math" "math/bits" + "sync" "github.com/klauspost/compress/fse" ) @@ -55,6 +56,9 @@ const ( // ReusePolicyNone will disable re-use of tables. // This is slightly faster than ReusePolicyAllow but may produce larger output. ReusePolicyNone + + // ReusePolicyMust must allow reuse and produce smaller output. + ReusePolicyMust ) type Scratch struct { @@ -113,9 +117,20 @@ type Scratch struct { nodes []nodeElt tmpOut [4][]byte fse *fse.Scratch + decPool sync.Pool // *[4][256]byte buffers. huffWeight [maxSymbolValue + 1]byte } +// TransferCTable will transfer the previously used compression table. +func (s *Scratch) TransferCTable(src *Scratch) { + if cap(s.prevTable) < len(src.prevTable) { + s.prevTable = make(cTable, 0, maxSymbolValue+1) + } + s.prevTable = s.prevTable[:len(src.prevTable)] + copy(s.prevTable, src.prevTable) + s.prevTableLog = src.prevTableLog +} + func (s *Scratch) prepare(in []byte) (*Scratch, error) { if len(in) > BlockSizeMax { return nil, ErrTooBig @@ -232,6 +247,68 @@ func (c cTable) write(s *Scratch) error { return nil } +func (c cTable) estTableSize(s *Scratch) (sz int, err error) { + var ( + // precomputed conversion table + bitsToWeight [tableLogMax + 1]byte + huffLog = s.actualTableLog + // last weight is not saved. + maxSymbolValue = uint8(s.symbolLen - 1) + huffWeight = s.huffWeight[:256] + ) + const ( + maxFSETableLog = 6 + ) + // convert to weight + bitsToWeight[0] = 0 + for n := uint8(1); n < huffLog+1; n++ { + bitsToWeight[n] = huffLog + 1 - n + } + + // Acquire histogram for FSE. + hist := s.fse.Histogram() + hist = hist[:256] + for i := range hist[:16] { + hist[i] = 0 + } + for n := uint8(0); n < maxSymbolValue; n++ { + v := bitsToWeight[c[n].nBits] & 15 + huffWeight[n] = v + hist[v]++ + } + + // FSE compress if feasible. + if maxSymbolValue >= 2 { + huffMaxCnt := uint32(0) + huffMax := uint8(0) + for i, v := range hist[:16] { + if v == 0 { + continue + } + huffMax = byte(i) + if v > huffMaxCnt { + huffMaxCnt = v + } + } + s.fse.HistogramFinished(huffMax, int(huffMaxCnt)) + s.fse.TableLog = maxFSETableLog + b, err := fse.Compress(huffWeight[:maxSymbolValue], s.fse) + if err == nil && len(b) < int(s.symbolLen>>1) { + sz += 1 + len(b) + return sz, nil + } + // Unable to compress (RLE/uncompressible) + } + // write raw values as 4-bits (max : 15) + if maxSymbolValue > (256 - 128) { + // should not happen : likely means source cannot be compressed + return 0, ErrIncompressible + } + // special case, pack weights 4 bits/weight. + sz += 1 + int(maxSymbolValue/2) + return sz, nil +} + // estimateSize returns the estimated size in bytes of the input represented in the // histogram supplied. func (c cTable) estimateSize(hist []uint32) int { diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go new file mode 100644 index 0000000..3954c51 --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go @@ -0,0 +1,34 @@ +// Package cpuinfo gives runtime info about the current CPU. +// +// This is a very limited module meant for use internally +// in this project. For more versatile solution check +// https://github.com/klauspost/cpuid. +package cpuinfo + +// HasBMI1 checks whether an x86 CPU supports the BMI1 extension. +func HasBMI1() bool { + return hasBMI1 +} + +// HasBMI2 checks whether an x86 CPU supports the BMI2 extension. +func HasBMI2() bool { + return hasBMI2 +} + +// DisableBMI2 will disable BMI2, for testing purposes. +// Call returned function to restore previous state. +func DisableBMI2() func() { + old := hasBMI2 + hasBMI2 = false + return func() { + hasBMI2 = old + } +} + +// HasBMI checks whether an x86 CPU supports both BMI1 and BMI2 extensions. +func HasBMI() bool { + return HasBMI1() && HasBMI2() +} + +var hasBMI1 bool +var hasBMI2 bool diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go new file mode 100644 index 0000000..e802579 --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go @@ -0,0 +1,11 @@ +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +package cpuinfo + +// go:noescape +func x86extensions() (bmi1, bmi2 bool) + +func init() { + hasBMI1, hasBMI2 = x86extensions() +} diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s new file mode 100644 index 0000000..4465fbe --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s @@ -0,0 +1,36 @@ +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" +#include "funcdata.h" +#include "go_asm.h" + +TEXT ·x86extensions(SB), NOSPLIT, $0 + // 1. determine max EAX value + XORQ AX, AX + CPUID + + CMPQ AX, $7 + JB unsupported + + // 2. EAX = 7, ECX = 0 --- see Table 3-8 "Information Returned by CPUID Instruction" + MOVQ $7, AX + MOVQ $0, CX + CPUID + + BTQ $3, BX // bit 3 = BMI1 + SETCS AL + + BTQ $8, BX // bit 8 = BMI2 + SETCS AH + + MOVB AL, bmi1+0(FP) + MOVB AH, bmi2+1(FP) + RET + +unsupported: + XORQ AX, AX + MOVB AL, bmi1+0(FP) + MOVB AL, bmi2+1(FP) + RET diff --git a/vendor/github.com/klauspost/compress/snappy/LICENSE b/vendor/github.com/klauspost/compress/internal/snapref/LICENSE similarity index 100% rename from vendor/github.com/klauspost/compress/snappy/LICENSE rename to vendor/github.com/klauspost/compress/internal/snapref/LICENSE diff --git a/vendor/github.com/klauspost/compress/snappy/decode.go b/vendor/github.com/klauspost/compress/internal/snapref/decode.go similarity index 85% rename from vendor/github.com/klauspost/compress/snappy/decode.go rename to vendor/github.com/klauspost/compress/internal/snapref/decode.go index 72efb03..40796a4 100644 --- a/vendor/github.com/klauspost/compress/snappy/decode.go +++ b/vendor/github.com/klauspost/compress/internal/snapref/decode.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package snappy +package snapref import ( "encoding/binary" @@ -52,6 +52,8 @@ const ( // Otherwise, a newly allocated slice will be returned. // // The dst and src must not overlap. It is valid to pass a nil dst. +// +// Decode handles the Snappy block format, not the Snappy stream format. func Decode(dst, src []byte) ([]byte, error) { dLen, s, err := decodedLen(src) if err != nil { @@ -83,6 +85,8 @@ func NewReader(r io.Reader) *Reader { } // Reader is an io.Reader that can read Snappy-compressed bytes. +// +// Reader handles the Snappy stream format, not the Snappy block format. type Reader struct { r io.Reader err error @@ -114,32 +118,23 @@ func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) { return true } -// Read satisfies the io.Reader interface. -func (r *Reader) Read(p []byte) (int, error) { - if r.err != nil { - return 0, r.err - } - for { - if r.i < r.j { - n := copy(p, r.decoded[r.i:r.j]) - r.i += n - return n, nil - } +func (r *Reader) fill() error { + for r.i >= r.j { if !r.readFull(r.buf[:4], true) { - return 0, r.err + return r.err } chunkType := r.buf[0] if !r.readHeader { if chunkType != chunkTypeStreamIdentifier { r.err = ErrCorrupt - return 0, r.err + return r.err } r.readHeader = true } chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 if chunkLen > len(r.buf) { r.err = ErrUnsupported - return 0, r.err + return r.err } // The chunk types are specified at @@ -149,11 +144,11 @@ func (r *Reader) Read(p []byte) (int, error) { // Section 4.2. Compressed data (chunk type 0x00). if chunkLen < checksumSize { r.err = ErrCorrupt - return 0, r.err + return r.err } buf := r.buf[:chunkLen] if !r.readFull(buf, false) { - return 0, r.err + return r.err } checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 buf = buf[checksumSize:] @@ -161,19 +156,19 @@ func (r *Reader) Read(p []byte) (int, error) { n, err := DecodedLen(buf) if err != nil { r.err = err - return 0, r.err + return r.err } if n > len(r.decoded) { r.err = ErrCorrupt - return 0, r.err + return r.err } if _, err := Decode(r.decoded, buf); err != nil { r.err = err - return 0, r.err + return r.err } if crc(r.decoded[:n]) != checksum { r.err = ErrCorrupt - return 0, r.err + return r.err } r.i, r.j = 0, n continue @@ -182,25 +177,25 @@ func (r *Reader) Read(p []byte) (int, error) { // Section 4.3. Uncompressed data (chunk type 0x01). if chunkLen < checksumSize { r.err = ErrCorrupt - return 0, r.err + return r.err } buf := r.buf[:checksumSize] if !r.readFull(buf, false) { - return 0, r.err + return r.err } checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 // Read directly into r.decoded instead of via r.buf. n := chunkLen - checksumSize if n > len(r.decoded) { r.err = ErrCorrupt - return 0, r.err + return r.err } if !r.readFull(r.decoded[:n], false) { - return 0, r.err + return r.err } if crc(r.decoded[:n]) != checksum { r.err = ErrCorrupt - return 0, r.err + return r.err } r.i, r.j = 0, n continue @@ -209,15 +204,15 @@ func (r *Reader) Read(p []byte) (int, error) { // Section 4.1. Stream identifier (chunk type 0xff). if chunkLen != len(magicBody) { r.err = ErrCorrupt - return 0, r.err + return r.err } if !r.readFull(r.buf[:len(magicBody)], false) { - return 0, r.err + return r.err } for i := 0; i < len(magicBody); i++ { if r.buf[i] != magicBody[i] { r.err = ErrCorrupt - return 0, r.err + return r.err } } continue @@ -226,12 +221,44 @@ func (r *Reader) Read(p []byte) (int, error) { if chunkType <= 0x7f { // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). r.err = ErrUnsupported - return 0, r.err + return r.err } // Section 4.4 Padding (chunk type 0xfe). // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). if !r.readFull(r.buf[:chunkLen], false) { - return 0, r.err + return r.err } } + + return nil +} + +// Read satisfies the io.Reader interface. +func (r *Reader) Read(p []byte) (int, error) { + if r.err != nil { + return 0, r.err + } + + if err := r.fill(); err != nil { + return 0, err + } + + n := copy(p, r.decoded[r.i:r.j]) + r.i += n + return n, nil +} + +// ReadByte satisfies the io.ByteReader interface. +func (r *Reader) ReadByte() (byte, error) { + if r.err != nil { + return 0, r.err + } + + if err := r.fill(); err != nil { + return 0, err + } + + c := r.decoded[r.i] + r.i++ + return c, nil } diff --git a/vendor/github.com/klauspost/compress/snappy/decode_other.go b/vendor/github.com/klauspost/compress/internal/snapref/decode_other.go similarity index 97% rename from vendor/github.com/klauspost/compress/snappy/decode_other.go rename to vendor/github.com/klauspost/compress/internal/snapref/decode_other.go index 94a96c5..77395a6 100644 --- a/vendor/github.com/klauspost/compress/snappy/decode_other.go +++ b/vendor/github.com/klauspost/compress/internal/snapref/decode_other.go @@ -2,9 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !amd64 appengine !gc noasm - -package snappy +package snapref // decode writes the decoding of src to dst. It assumes that the varint-encoded // length of the decompressed bytes has already been read, and that len(dst) @@ -87,7 +85,7 @@ func decode(dst, src []byte) int { } // Copy from an earlier sub-slice of dst to a later sub-slice. // If no overlap, use the built-in copy: - if offset > length { + if offset >= length { copy(dst[d:d+length], dst[d-offset:]) d += length continue diff --git a/vendor/github.com/klauspost/compress/snappy/encode.go b/vendor/github.com/klauspost/compress/internal/snapref/encode.go similarity index 98% rename from vendor/github.com/klauspost/compress/snappy/encode.go rename to vendor/github.com/klauspost/compress/internal/snapref/encode.go index 8d393e9..13c6040 100644 --- a/vendor/github.com/klauspost/compress/snappy/encode.go +++ b/vendor/github.com/klauspost/compress/internal/snapref/encode.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package snappy +package snapref import ( "encoding/binary" @@ -15,6 +15,8 @@ import ( // Otherwise, a newly allocated slice will be returned. // // The dst and src must not overlap. It is valid to pass a nil dst. +// +// Encode handles the Snappy block format, not the Snappy stream format. func Encode(dst, src []byte) []byte { if n := MaxEncodedLen(len(src)); n < 0 { panic(ErrTooLarge) @@ -139,6 +141,8 @@ func NewBufferedWriter(w io.Writer) *Writer { } // Writer is an io.Writer that can write Snappy-compressed bytes. +// +// Writer handles the Snappy stream format, not the Snappy block format. type Writer struct { w io.Writer err error diff --git a/vendor/github.com/klauspost/compress/snappy/encode_other.go b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go similarity index 99% rename from vendor/github.com/klauspost/compress/snappy/encode_other.go rename to vendor/github.com/klauspost/compress/internal/snapref/encode_other.go index dbcae90..511bba6 100644 --- a/vendor/github.com/klauspost/compress/snappy/encode_other.go +++ b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go @@ -2,9 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !amd64 appengine !gc noasm - -package snappy +package snapref func load32(b []byte, i int) uint32 { b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line. diff --git a/vendor/github.com/klauspost/compress/snappy/snappy.go b/vendor/github.com/klauspost/compress/internal/snapref/snappy.go similarity index 97% rename from vendor/github.com/klauspost/compress/snappy/snappy.go rename to vendor/github.com/klauspost/compress/internal/snapref/snappy.go index 74a3668..34d01f4 100644 --- a/vendor/github.com/klauspost/compress/snappy/snappy.go +++ b/vendor/github.com/klauspost/compress/internal/snapref/snappy.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Package snappy implements the Snappy compression format. It aims for very +// Package snapref implements the Snappy compression format. It aims for very // high speeds and reasonable compression. // // There are actually two Snappy formats: block and stream. They are related, @@ -17,7 +17,7 @@ // // The canonical, C++ implementation is at https://github.com/google/snappy and // it only implements the block format. -package snappy +package snapref import ( "hash/crc32" diff --git a/vendor/github.com/klauspost/compress/s2sx.mod b/vendor/github.com/klauspost/compress/s2sx.mod new file mode 100644 index 0000000..2263853 --- /dev/null +++ b/vendor/github.com/klauspost/compress/s2sx.mod @@ -0,0 +1,4 @@ +module github.com/klauspost/compress + +go 1.16 + diff --git a/vendor/github.com/klauspost/compress/s2sx.sum b/vendor/github.com/klauspost/compress/s2sx.sum new file mode 100644 index 0000000..e69de29 diff --git a/vendor/github.com/klauspost/compress/snappy/.gitignore b/vendor/github.com/klauspost/compress/snappy/.gitignore deleted file mode 100644 index 042091d..0000000 --- a/vendor/github.com/klauspost/compress/snappy/.gitignore +++ /dev/null @@ -1,16 +0,0 @@ -cmd/snappytool/snappytool -testdata/bench - -# These explicitly listed benchmark data files are for an obsolete version of -# snappy_test.go. -testdata/alice29.txt -testdata/asyoulik.txt -testdata/fireworks.jpeg -testdata/geo.protodata -testdata/html -testdata/html_x_4 -testdata/kppkn.gtb -testdata/lcet10.txt -testdata/paper-100k.pdf -testdata/plrabn12.txt -testdata/urls.10K diff --git a/vendor/github.com/klauspost/compress/snappy/AUTHORS b/vendor/github.com/klauspost/compress/snappy/AUTHORS deleted file mode 100644 index bcfa195..0000000 --- a/vendor/github.com/klauspost/compress/snappy/AUTHORS +++ /dev/null @@ -1,15 +0,0 @@ -# This is the official list of Snappy-Go authors for copyright purposes. -# This file is distinct from the CONTRIBUTORS files. -# See the latter for an explanation. - -# Names should be added to this file as -# Name or Organization -# The email address is not required for organizations. - -# Please keep the list sorted. - -Damian Gryski -Google Inc. -Jan Mercl <0xjnml@gmail.com> -Rodolfo Carvalho -Sebastien Binet diff --git a/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS b/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS deleted file mode 100644 index 931ae31..0000000 --- a/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS +++ /dev/null @@ -1,37 +0,0 @@ -# This is the official list of people who can contribute -# (and typically have contributed) code to the Snappy-Go repository. -# The AUTHORS file lists the copyright holders; this file -# lists people. For example, Google employees are listed here -# but not in AUTHORS, because Google holds the copyright. -# -# The submission process automatically checks to make sure -# that people submitting code are listed in this file (by email address). -# -# Names should be added to this file only after verifying that -# the individual or the individual's organization has agreed to -# the appropriate Contributor License Agreement, found here: -# -# http://code.google.com/legal/individual-cla-v1.0.html -# http://code.google.com/legal/corporate-cla-v1.0.html -# -# The agreement for individuals can be filled out on the web. -# -# When adding J Random Contributor's name to this file, -# either J's name or J's organization's name should be -# added to the AUTHORS file, depending on whether the -# individual or corporate CLA was used. - -# Names should be added to this file like so: -# Name - -# Please keep the list sorted. - -Damian Gryski -Jan Mercl <0xjnml@gmail.com> -Kai Backman -Marc-Antoine Ruel -Nigel Tao -Rob Pike -Rodolfo Carvalho -Russ Cox -Sebastien Binet diff --git a/vendor/github.com/klauspost/compress/snappy/README b/vendor/github.com/klauspost/compress/snappy/README deleted file mode 100644 index cea1287..0000000 --- a/vendor/github.com/klauspost/compress/snappy/README +++ /dev/null @@ -1,107 +0,0 @@ -The Snappy compression format in the Go programming language. - -To download and install from source: -$ go get github.com/golang/snappy - -Unless otherwise noted, the Snappy-Go source files are distributed -under the BSD-style license found in the LICENSE file. - - - -Benchmarks. - -The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten -or so files, the same set used by the C++ Snappy code (github.com/google/snappy -and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @ -3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29: - -"go test -test.bench=." - -_UFlat0-8 2.19GB/s ± 0% html -_UFlat1-8 1.41GB/s ± 0% urls -_UFlat2-8 23.5GB/s ± 2% jpg -_UFlat3-8 1.91GB/s ± 0% jpg_200 -_UFlat4-8 14.0GB/s ± 1% pdf -_UFlat5-8 1.97GB/s ± 0% html4 -_UFlat6-8 814MB/s ± 0% txt1 -_UFlat7-8 785MB/s ± 0% txt2 -_UFlat8-8 857MB/s ± 0% txt3 -_UFlat9-8 719MB/s ± 1% txt4 -_UFlat10-8 2.84GB/s ± 0% pb -_UFlat11-8 1.05GB/s ± 0% gaviota - -_ZFlat0-8 1.04GB/s ± 0% html -_ZFlat1-8 534MB/s ± 0% urls -_ZFlat2-8 15.7GB/s ± 1% jpg -_ZFlat3-8 740MB/s ± 3% jpg_200 -_ZFlat4-8 9.20GB/s ± 1% pdf -_ZFlat5-8 991MB/s ± 0% html4 -_ZFlat6-8 379MB/s ± 0% txt1 -_ZFlat7-8 352MB/s ± 0% txt2 -_ZFlat8-8 396MB/s ± 1% txt3 -_ZFlat9-8 327MB/s ± 1% txt4 -_ZFlat10-8 1.33GB/s ± 1% pb -_ZFlat11-8 605MB/s ± 1% gaviota - - - -"go test -test.bench=. -tags=noasm" - -_UFlat0-8 621MB/s ± 2% html -_UFlat1-8 494MB/s ± 1% urls -_UFlat2-8 23.2GB/s ± 1% jpg -_UFlat3-8 1.12GB/s ± 1% jpg_200 -_UFlat4-8 4.35GB/s ± 1% pdf -_UFlat5-8 609MB/s ± 0% html4 -_UFlat6-8 296MB/s ± 0% txt1 -_UFlat7-8 288MB/s ± 0% txt2 -_UFlat8-8 309MB/s ± 1% txt3 -_UFlat9-8 280MB/s ± 1% txt4 -_UFlat10-8 753MB/s ± 0% pb -_UFlat11-8 400MB/s ± 0% gaviota - -_ZFlat0-8 409MB/s ± 1% html -_ZFlat1-8 250MB/s ± 1% urls -_ZFlat2-8 12.3GB/s ± 1% jpg -_ZFlat3-8 132MB/s ± 0% jpg_200 -_ZFlat4-8 2.92GB/s ± 0% pdf -_ZFlat5-8 405MB/s ± 1% html4 -_ZFlat6-8 179MB/s ± 1% txt1 -_ZFlat7-8 170MB/s ± 1% txt2 -_ZFlat8-8 189MB/s ± 1% txt3 -_ZFlat9-8 164MB/s ± 1% txt4 -_ZFlat10-8 479MB/s ± 1% pb -_ZFlat11-8 270MB/s ± 1% gaviota - - - -For comparison (Go's encoded output is byte-for-byte identical to C++'s), here -are the numbers from C++ Snappy's - -make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log - -BM_UFlat/0 2.4GB/s html -BM_UFlat/1 1.4GB/s urls -BM_UFlat/2 21.8GB/s jpg -BM_UFlat/3 1.5GB/s jpg_200 -BM_UFlat/4 13.3GB/s pdf -BM_UFlat/5 2.1GB/s html4 -BM_UFlat/6 1.0GB/s txt1 -BM_UFlat/7 959.4MB/s txt2 -BM_UFlat/8 1.0GB/s txt3 -BM_UFlat/9 864.5MB/s txt4 -BM_UFlat/10 2.9GB/s pb -BM_UFlat/11 1.2GB/s gaviota - -BM_ZFlat/0 944.3MB/s html (22.31 %) -BM_ZFlat/1 501.6MB/s urls (47.78 %) -BM_ZFlat/2 14.3GB/s jpg (99.95 %) -BM_ZFlat/3 538.3MB/s jpg_200 (73.00 %) -BM_ZFlat/4 8.3GB/s pdf (83.30 %) -BM_ZFlat/5 903.5MB/s html4 (22.52 %) -BM_ZFlat/6 336.0MB/s txt1 (57.88 %) -BM_ZFlat/7 312.3MB/s txt2 (61.91 %) -BM_ZFlat/8 353.1MB/s txt3 (54.99 %) -BM_ZFlat/9 289.9MB/s txt4 (66.26 %) -BM_ZFlat/10 1.2GB/s pb (19.68 %) -BM_ZFlat/11 527.4MB/s gaviota (37.72 %) diff --git a/vendor/github.com/klauspost/compress/snappy/encode_amd64.go b/vendor/github.com/klauspost/compress/snappy/encode_amd64.go deleted file mode 100644 index 150d91b..0000000 --- a/vendor/github.com/klauspost/compress/snappy/encode_amd64.go +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2016 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm - -package snappy - -// emitLiteral has the same semantics as in encode_other.go. -// -//go:noescape -func emitLiteral(dst, lit []byte) int - -// emitCopy has the same semantics as in encode_other.go. -// -//go:noescape -func emitCopy(dst []byte, offset, length int) int - -// extendMatch has the same semantics as in encode_other.go. -// -//go:noescape -func extendMatch(src []byte, i, j int) int - -// encodeBlock has the same semantics as in encode_other.go. -// -//go:noescape -func encodeBlock(dst, src []byte) (d int) diff --git a/vendor/github.com/klauspost/compress/snappy/encode_amd64.s b/vendor/github.com/klauspost/compress/snappy/encode_amd64.s deleted file mode 100644 index adfd979..0000000 --- a/vendor/github.com/klauspost/compress/snappy/encode_amd64.s +++ /dev/null @@ -1,730 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm - -#include "textflag.h" - -// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a -// Go toolchain regression. See https://github.com/golang/go/issues/15426 and -// https://github.com/golang/snappy/issues/29 -// -// As a workaround, the package was built with a known good assembler, and -// those instructions were disassembled by "objdump -d" to yield the -// 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 -// style comments, in AT&T asm syntax. Note that rsp here is a physical -// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm). -// The instructions were then encoded as "BYTE $0x.." sequences, which assemble -// fine on Go 1.6. - -// The asm code generally follows the pure Go code in encode_other.go, except -// where marked with a "!!!". - -// ---------------------------------------------------------------------------- - -// func emitLiteral(dst, lit []byte) int -// -// All local variables fit into registers. The register allocation: -// - AX len(lit) -// - BX n -// - DX return value -// - DI &dst[i] -// - R10 &lit[0] -// -// The 24 bytes of stack space is to call runtime·memmove. -// -// The unusual register allocation of local variables, such as R10 for the -// source pointer, matches the allocation used at the call site in encodeBlock, -// which makes it easier to manually inline this function. -TEXT ·emitLiteral(SB), NOSPLIT, $24-56 - MOVQ dst_base+0(FP), DI - MOVQ lit_base+24(FP), R10 - MOVQ lit_len+32(FP), AX - MOVQ AX, DX - MOVL AX, BX - SUBL $1, BX - - CMPL BX, $60 - JLT oneByte - CMPL BX, $256 - JLT twoBytes - -threeBytes: - MOVB $0xf4, 0(DI) - MOVW BX, 1(DI) - ADDQ $3, DI - ADDQ $3, DX - JMP memmove - -twoBytes: - MOVB $0xf0, 0(DI) - MOVB BX, 1(DI) - ADDQ $2, DI - ADDQ $2, DX - JMP memmove - -oneByte: - SHLB $2, BX - MOVB BX, 0(DI) - ADDQ $1, DI - ADDQ $1, DX - -memmove: - MOVQ DX, ret+48(FP) - - // copy(dst[i:], lit) - // - // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push - // DI, R10 and AX as arguments. - MOVQ DI, 0(SP) - MOVQ R10, 8(SP) - MOVQ AX, 16(SP) - CALL runtime·memmove(SB) - RET - -// ---------------------------------------------------------------------------- - -// func emitCopy(dst []byte, offset, length int) int -// -// All local variables fit into registers. The register allocation: -// - AX length -// - SI &dst[0] -// - DI &dst[i] -// - R11 offset -// -// The unusual register allocation of local variables, such as R11 for the -// offset, matches the allocation used at the call site in encodeBlock, which -// makes it easier to manually inline this function. -TEXT ·emitCopy(SB), NOSPLIT, $0-48 - MOVQ dst_base+0(FP), DI - MOVQ DI, SI - MOVQ offset+24(FP), R11 - MOVQ length+32(FP), AX - -loop0: - // for length >= 68 { etc } - CMPL AX, $68 - JLT step1 - - // Emit a length 64 copy, encoded as 3 bytes. - MOVB $0xfe, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - SUBL $64, AX - JMP loop0 - -step1: - // if length > 64 { etc } - CMPL AX, $64 - JLE step2 - - // Emit a length 60 copy, encoded as 3 bytes. - MOVB $0xee, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - SUBL $60, AX - -step2: - // if length >= 12 || offset >= 2048 { goto step3 } - CMPL AX, $12 - JGE step3 - CMPL R11, $2048 - JGE step3 - - // Emit the remaining copy, encoded as 2 bytes. - MOVB R11, 1(DI) - SHRL $8, R11 - SHLB $5, R11 - SUBB $4, AX - SHLB $2, AX - ORB AX, R11 - ORB $1, R11 - MOVB R11, 0(DI) - ADDQ $2, DI - - // Return the number of bytes written. - SUBQ SI, DI - MOVQ DI, ret+40(FP) - RET - -step3: - // Emit the remaining copy, encoded as 3 bytes. - SUBL $1, AX - SHLB $2, AX - ORB $2, AX - MOVB AX, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - - // Return the number of bytes written. - SUBQ SI, DI - MOVQ DI, ret+40(FP) - RET - -// ---------------------------------------------------------------------------- - -// func extendMatch(src []byte, i, j int) int -// -// All local variables fit into registers. The register allocation: -// - DX &src[0] -// - SI &src[j] -// - R13 &src[len(src) - 8] -// - R14 &src[len(src)] -// - R15 &src[i] -// -// The unusual register allocation of local variables, such as R15 for a source -// pointer, matches the allocation used at the call site in encodeBlock, which -// makes it easier to manually inline this function. -TEXT ·extendMatch(SB), NOSPLIT, $0-48 - MOVQ src_base+0(FP), DX - MOVQ src_len+8(FP), R14 - MOVQ i+24(FP), R15 - MOVQ j+32(FP), SI - ADDQ DX, R14 - ADDQ DX, R15 - ADDQ DX, SI - MOVQ R14, R13 - SUBQ $8, R13 - -cmp8: - // As long as we are 8 or more bytes before the end of src, we can load and - // compare 8 bytes at a time. If those 8 bytes are equal, repeat. - CMPQ SI, R13 - JA cmp1 - MOVQ (R15), AX - MOVQ (SI), BX - CMPQ AX, BX - JNE bsf - ADDQ $8, R15 - ADDQ $8, SI - JMP cmp8 - -bsf: - // If those 8 bytes were not equal, XOR the two 8 byte values, and return - // the index of the first byte that differs. The BSF instruction finds the - // least significant 1 bit, the amd64 architecture is little-endian, and - // the shift by 3 converts a bit index to a byte index. - XORQ AX, BX - BSFQ BX, BX - SHRQ $3, BX - ADDQ BX, SI - - // Convert from &src[ret] to ret. - SUBQ DX, SI - MOVQ SI, ret+40(FP) - RET - -cmp1: - // In src's tail, compare 1 byte at a time. - CMPQ SI, R14 - JAE extendMatchEnd - MOVB (R15), AX - MOVB (SI), BX - CMPB AX, BX - JNE extendMatchEnd - ADDQ $1, R15 - ADDQ $1, SI - JMP cmp1 - -extendMatchEnd: - // Convert from &src[ret] to ret. - SUBQ DX, SI - MOVQ SI, ret+40(FP) - RET - -// ---------------------------------------------------------------------------- - -// func encodeBlock(dst, src []byte) (d int) -// -// All local variables fit into registers, other than "var table". The register -// allocation: -// - AX . . -// - BX . . -// - CX 56 shift (note that amd64 shifts by non-immediates must use CX). -// - DX 64 &src[0], tableSize -// - SI 72 &src[s] -// - DI 80 &dst[d] -// - R9 88 sLimit -// - R10 . &src[nextEmit] -// - R11 96 prevHash, currHash, nextHash, offset -// - R12 104 &src[base], skip -// - R13 . &src[nextS], &src[len(src) - 8] -// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x -// - R15 112 candidate -// -// The second column (56, 64, etc) is the stack offset to spill the registers -// when calling other functions. We could pack this slightly tighter, but it's -// simpler to have a dedicated spill map independent of the function called. -// -// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An -// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill -// local variables (registers) during calls gives 32768 + 56 + 64 = 32888. -TEXT ·encodeBlock(SB), 0, $32888-56 - MOVQ dst_base+0(FP), DI - MOVQ src_base+24(FP), SI - MOVQ src_len+32(FP), R14 - - // shift, tableSize := uint32(32-8), 1<<8 - MOVQ $24, CX - MOVQ $256, DX - -calcShift: - // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { - // shift-- - // } - CMPQ DX, $16384 - JGE varTable - CMPQ DX, R14 - JGE varTable - SUBQ $1, CX - SHLQ $1, DX - JMP calcShift - -varTable: - // var table [maxTableSize]uint16 - // - // In the asm code, unlike the Go code, we can zero-initialize only the - // first tableSize elements. Each uint16 element is 2 bytes and each MOVOU - // writes 16 bytes, so we can do only tableSize/8 writes instead of the - // 2048 writes that would zero-initialize all of table's 32768 bytes. - SHRQ $3, DX - LEAQ table-32768(SP), BX - PXOR X0, X0 - -memclr: - MOVOU X0, 0(BX) - ADDQ $16, BX - SUBQ $1, DX - JNZ memclr - - // !!! DX = &src[0] - MOVQ SI, DX - - // sLimit := len(src) - inputMargin - MOVQ R14, R9 - SUBQ $15, R9 - - // !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't - // change for the rest of the function. - MOVQ CX, 56(SP) - MOVQ DX, 64(SP) - MOVQ R9, 88(SP) - - // nextEmit := 0 - MOVQ DX, R10 - - // s := 1 - ADDQ $1, SI - - // nextHash := hash(load32(src, s), shift) - MOVL 0(SI), R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - -outer: - // for { etc } - - // skip := 32 - MOVQ $32, R12 - - // nextS := s - MOVQ SI, R13 - - // candidate := 0 - MOVQ $0, R15 - -inner0: - // for { etc } - - // s := nextS - MOVQ R13, SI - - // bytesBetweenHashLookups := skip >> 5 - MOVQ R12, R14 - SHRQ $5, R14 - - // nextS = s + bytesBetweenHashLookups - ADDQ R14, R13 - - // skip += bytesBetweenHashLookups - ADDQ R14, R12 - - // if nextS > sLimit { goto emitRemainder } - MOVQ R13, AX - SUBQ DX, AX - CMPQ AX, R9 - JA emitRemainder - - // candidate = int(table[nextHash]) - // XXX: MOVWQZX table-32768(SP)(R11*2), R15 - // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 - BYTE $0x4e - BYTE $0x0f - BYTE $0xb7 - BYTE $0x7c - BYTE $0x5c - BYTE $0x78 - - // table[nextHash] = uint16(s) - MOVQ SI, AX - SUBQ DX, AX - - // XXX: MOVW AX, table-32768(SP)(R11*2) - // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) - BYTE $0x66 - BYTE $0x42 - BYTE $0x89 - BYTE $0x44 - BYTE $0x5c - BYTE $0x78 - - // nextHash = hash(load32(src, nextS), shift) - MOVL 0(R13), R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - - // if load32(src, s) != load32(src, candidate) { continue } break - MOVL 0(SI), AX - MOVL (DX)(R15*1), BX - CMPL AX, BX - JNE inner0 - -fourByteMatch: - // As per the encode_other.go code: - // - // A 4-byte match has been found. We'll later see etc. - - // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment - // on inputMargin in encode.go. - MOVQ SI, AX - SUBQ R10, AX - CMPQ AX, $16 - JLE emitLiteralFastPath - - // ---------------------------------------- - // Begin inline of the emitLiteral call. - // - // d += emitLiteral(dst[d:], src[nextEmit:s]) - - MOVL AX, BX - SUBL $1, BX - - CMPL BX, $60 - JLT inlineEmitLiteralOneByte - CMPL BX, $256 - JLT inlineEmitLiteralTwoBytes - -inlineEmitLiteralThreeBytes: - MOVB $0xf4, 0(DI) - MOVW BX, 1(DI) - ADDQ $3, DI - JMP inlineEmitLiteralMemmove - -inlineEmitLiteralTwoBytes: - MOVB $0xf0, 0(DI) - MOVB BX, 1(DI) - ADDQ $2, DI - JMP inlineEmitLiteralMemmove - -inlineEmitLiteralOneByte: - SHLB $2, BX - MOVB BX, 0(DI) - ADDQ $1, DI - -inlineEmitLiteralMemmove: - // Spill local variables (registers) onto the stack; call; unspill. - // - // copy(dst[i:], lit) - // - // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push - // DI, R10 and AX as arguments. - MOVQ DI, 0(SP) - MOVQ R10, 8(SP) - MOVQ AX, 16(SP) - ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)". - MOVQ SI, 72(SP) - MOVQ DI, 80(SP) - MOVQ R15, 112(SP) - CALL runtime·memmove(SB) - MOVQ 56(SP), CX - MOVQ 64(SP), DX - MOVQ 72(SP), SI - MOVQ 80(SP), DI - MOVQ 88(SP), R9 - MOVQ 112(SP), R15 - JMP inner1 - -inlineEmitLiteralEnd: - // End inline of the emitLiteral call. - // ---------------------------------------- - -emitLiteralFastPath: - // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". - MOVB AX, BX - SUBB $1, BX - SHLB $2, BX - MOVB BX, (DI) - ADDQ $1, DI - - // !!! Implement the copy from lit to dst as a 16-byte load and store. - // (Encode's documentation says that dst and src must not overlap.) - // - // This always copies 16 bytes, instead of only len(lit) bytes, but that's - // OK. Subsequent iterations will fix up the overrun. - // - // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or - // 16-byte loads and stores. This technique probably wouldn't be as - // effective on architectures that are fussier about alignment. - MOVOU 0(R10), X0 - MOVOU X0, 0(DI) - ADDQ AX, DI - -inner1: - // for { etc } - - // base := s - MOVQ SI, R12 - - // !!! offset := base - candidate - MOVQ R12, R11 - SUBQ R15, R11 - SUBQ DX, R11 - - // ---------------------------------------- - // Begin inline of the extendMatch call. - // - // s = extendMatch(src, candidate+4, s+4) - - // !!! R14 = &src[len(src)] - MOVQ src_len+32(FP), R14 - ADDQ DX, R14 - - // !!! R13 = &src[len(src) - 8] - MOVQ R14, R13 - SUBQ $8, R13 - - // !!! R15 = &src[candidate + 4] - ADDQ $4, R15 - ADDQ DX, R15 - - // !!! s += 4 - ADDQ $4, SI - -inlineExtendMatchCmp8: - // As long as we are 8 or more bytes before the end of src, we can load and - // compare 8 bytes at a time. If those 8 bytes are equal, repeat. - CMPQ SI, R13 - JA inlineExtendMatchCmp1 - MOVQ (R15), AX - MOVQ (SI), BX - CMPQ AX, BX - JNE inlineExtendMatchBSF - ADDQ $8, R15 - ADDQ $8, SI - JMP inlineExtendMatchCmp8 - -inlineExtendMatchBSF: - // If those 8 bytes were not equal, XOR the two 8 byte values, and return - // the index of the first byte that differs. The BSF instruction finds the - // least significant 1 bit, the amd64 architecture is little-endian, and - // the shift by 3 converts a bit index to a byte index. - XORQ AX, BX - BSFQ BX, BX - SHRQ $3, BX - ADDQ BX, SI - JMP inlineExtendMatchEnd - -inlineExtendMatchCmp1: - // In src's tail, compare 1 byte at a time. - CMPQ SI, R14 - JAE inlineExtendMatchEnd - MOVB (R15), AX - MOVB (SI), BX - CMPB AX, BX - JNE inlineExtendMatchEnd - ADDQ $1, R15 - ADDQ $1, SI - JMP inlineExtendMatchCmp1 - -inlineExtendMatchEnd: - // End inline of the extendMatch call. - // ---------------------------------------- - - // ---------------------------------------- - // Begin inline of the emitCopy call. - // - // d += emitCopy(dst[d:], base-candidate, s-base) - - // !!! length := s - base - MOVQ SI, AX - SUBQ R12, AX - -inlineEmitCopyLoop0: - // for length >= 68 { etc } - CMPL AX, $68 - JLT inlineEmitCopyStep1 - - // Emit a length 64 copy, encoded as 3 bytes. - MOVB $0xfe, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - SUBL $64, AX - JMP inlineEmitCopyLoop0 - -inlineEmitCopyStep1: - // if length > 64 { etc } - CMPL AX, $64 - JLE inlineEmitCopyStep2 - - // Emit a length 60 copy, encoded as 3 bytes. - MOVB $0xee, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - SUBL $60, AX - -inlineEmitCopyStep2: - // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } - CMPL AX, $12 - JGE inlineEmitCopyStep3 - CMPL R11, $2048 - JGE inlineEmitCopyStep3 - - // Emit the remaining copy, encoded as 2 bytes. - MOVB R11, 1(DI) - SHRL $8, R11 - SHLB $5, R11 - SUBB $4, AX - SHLB $2, AX - ORB AX, R11 - ORB $1, R11 - MOVB R11, 0(DI) - ADDQ $2, DI - JMP inlineEmitCopyEnd - -inlineEmitCopyStep3: - // Emit the remaining copy, encoded as 3 bytes. - SUBL $1, AX - SHLB $2, AX - ORB $2, AX - MOVB AX, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - -inlineEmitCopyEnd: - // End inline of the emitCopy call. - // ---------------------------------------- - - // nextEmit = s - MOVQ SI, R10 - - // if s >= sLimit { goto emitRemainder } - MOVQ SI, AX - SUBQ DX, AX - CMPQ AX, R9 - JAE emitRemainder - - // As per the encode_other.go code: - // - // We could immediately etc. - - // x := load64(src, s-1) - MOVQ -1(SI), R14 - - // prevHash := hash(uint32(x>>0), shift) - MOVL R14, R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - - // table[prevHash] = uint16(s-1) - MOVQ SI, AX - SUBQ DX, AX - SUBQ $1, AX - - // XXX: MOVW AX, table-32768(SP)(R11*2) - // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) - BYTE $0x66 - BYTE $0x42 - BYTE $0x89 - BYTE $0x44 - BYTE $0x5c - BYTE $0x78 - - // currHash := hash(uint32(x>>8), shift) - SHRQ $8, R14 - MOVL R14, R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - - // candidate = int(table[currHash]) - // XXX: MOVWQZX table-32768(SP)(R11*2), R15 - // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 - BYTE $0x4e - BYTE $0x0f - BYTE $0xb7 - BYTE $0x7c - BYTE $0x5c - BYTE $0x78 - - // table[currHash] = uint16(s) - ADDQ $1, AX - - // XXX: MOVW AX, table-32768(SP)(R11*2) - // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) - BYTE $0x66 - BYTE $0x42 - BYTE $0x89 - BYTE $0x44 - BYTE $0x5c - BYTE $0x78 - - // if uint32(x>>8) == load32(src, candidate) { continue } - MOVL (DX)(R15*1), BX - CMPL R14, BX - JEQ inner1 - - // nextHash = hash(uint32(x>>16), shift) - SHRQ $8, R14 - MOVL R14, R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - - // s++ - ADDQ $1, SI - - // break out of the inner1 for loop, i.e. continue the outer loop. - JMP outer - -emitRemainder: - // if nextEmit < len(src) { etc } - MOVQ src_len+32(FP), AX - ADDQ DX, AX - CMPQ R10, AX - JEQ encodeBlockEnd - - // d += emitLiteral(dst[d:], src[nextEmit:]) - // - // Push args. - MOVQ DI, 0(SP) - MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative. - MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative. - MOVQ R10, 24(SP) - SUBQ R10, AX - MOVQ AX, 32(SP) - MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative. - - // Spill local variables (registers) onto the stack; call; unspill. - MOVQ DI, 80(SP) - CALL ·emitLiteral(SB) - MOVQ 80(SP), DI - - // Finish the "d +=" part of "d += emitLiteral(etc)". - ADDQ 48(SP), DI - -encodeBlockEnd: - MOVQ dst_base+0(FP), AX - SUBQ AX, DI - MOVQ DI, d+48(FP) - RET diff --git a/vendor/github.com/klauspost/compress/snappy/runbench.cmd b/vendor/github.com/klauspost/compress/snappy/runbench.cmd deleted file mode 100644 index d24eb4b..0000000 --- a/vendor/github.com/klauspost/compress/snappy/runbench.cmd +++ /dev/null @@ -1,2 +0,0 @@ -del old.txt -go test -bench=. >>old.txt && go test -bench=. >>old.txt && go test -bench=. >>old.txt && benchstat -delta-test=ttest old.txt new.txt diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md index ac3640d..beb7fa8 100644 --- a/vendor/github.com/klauspost/compress/zstd/README.md +++ b/vendor/github.com/klauspost/compress/zstd/README.md @@ -5,7 +5,6 @@ It offers a very wide range of compression / speed trade-off, while being backed A high performance compression algorithm is implemented. For now focused on speed. This package provides [compression](#Compressor) to and [decompression](#Decompressor) of Zstandard content. -Note that custom dictionaries are only supported for decompression. This package is pure Go and without use of "unsafe". @@ -17,30 +16,28 @@ Currently the package is heavily optimized for 64 bit processors and will be sig Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`. -Godoc Documentation: https://godoc.org/github.com/klauspost/compress/zstd - +[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/compress/zstd.svg)](https://pkg.go.dev/github.com/klauspost/compress/zstd) ## Compressor ### Status: STABLE - there may always be subtle bugs, a wide variety of content has been tested and the library is actively -used by several projects. This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz), -kindly supplied by [fuzzit.dev](https://fuzzit.dev/). +used by several projects. This library is being [fuzz-tested](https://github.com/klauspost/compress-fuzz) for all updates. There may still be specific combinations of data types/size/settings that could lead to edge cases, so as always, testing is recommended. For now, a high speed (fastest) and medium-fast (default) compressor has been implemented. -The "Fastest" compression ratio is roughly equivalent to zstd level 1. -The "Default" compression ratio is roughly equivalent to zstd level 3 (default). +* The "Fastest" compression ratio is roughly equivalent to zstd level 1. +* The "Default" compression ratio is roughly equivalent to zstd level 3 (default). +* The "Better" compression ratio is roughly equivalent to zstd level 7. +* The "Best" compression ratio is roughly equivalent to zstd level 11. In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode. The compression ratio compared to stdlib is around level 3, but usually 3x as fast. -Compared to cgo zstd, the speed is around level 3 (default), but compression slightly worse, between level 1&2. - ### Usage @@ -55,11 +52,11 @@ To create a writer with default options, do like this: ```Go // Compress input to output. func Compress(in io.Reader, out io.Writer) error { - w, err := NewWriter(output) + enc, err := zstd.NewWriter(out) if err != nil { return err } - _, err := io.Copy(w, input) + _, err = io.Copy(enc, in) if err != nil { enc.Close() return err @@ -81,6 +78,9 @@ of a stream. This is independent of the `WithEncoderConcurrency(n)`, but that is in the future. So if you want to limit concurrency for future updates, specify the concurrency you would like. +If you would like stream encoding to be done without spawning async goroutines, use `WithEncoderConcurrency(1)` +which will compress input as each block is completed, blocking on writes until each has completed. + You can specify your desired compression level using `WithEncoderLevel()` option. Currently only pre-defined compression settings can be specified. @@ -107,7 +107,8 @@ and seems to ignore concatenated streams, even though [it is part of the spec](h For compressing small blocks, the returned encoder has a function called `EncodeAll(src, dst []byte) []byte`. `EncodeAll` will encode all input in src and append it to dst. -This function can be called concurrently, but each call will only run on a single goroutine. +This function can be called concurrently. +Each call will only run on a same goroutine as the caller. Encoded blocks can be concatenated and the result will be the combined input stream. Data compressed with EncodeAll can be decoded with the Decoder, using either a stream or `DecodeAll`. @@ -141,7 +142,7 @@ I have collected some speed examples to compare speed and compression against ot * `file` is the input file. * `out` is the compressor used. `zskp` is this package. `zstd` is the Datadog cgo library. `gzstd/gzkp` is gzip standard and this library. -* `level` is the compression level used. For `zskp` level 1 is "fastest", level 2 is "default". +* `level` is the compression level used. For `zskp` level 1 is "fastest", level 2 is "default"; 3 is "better", 4 is "best". * `insize`/`outsize` is the input/output size. * `millis` is the number of milliseconds used for compression. * `mb/s` is megabytes (2^20 bytes) per second. @@ -152,121 +153,108 @@ http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip This package: file out level insize outsize millis mb/s -silesia.tar zskp 1 211947520 73101992 643 313.87 -silesia.tar zskp 2 211947520 67504318 969 208.38 -silesia.tar zskp 3 211947520 65177448 1899 106.44 +silesia.tar zskp 1 211947520 73821326 634 318.47 +silesia.tar zskp 2 211947520 67655404 1508 133.96 +silesia.tar zskp 3 211947520 64746933 3000 67.37 +silesia.tar zskp 4 211947520 60073508 16926 11.94 cgo zstd: silesia.tar zstd 1 211947520 73605392 543 371.56 silesia.tar zstd 3 211947520 66793289 864 233.68 silesia.tar zstd 6 211947520 62916450 1913 105.66 +silesia.tar zstd 9 211947520 60212393 5063 39.92 gzip, stdlib/this package: -silesia.tar gzstd 1 211947520 80007735 1654 122.21 -silesia.tar gzkp 1 211947520 80369488 1168 173.06 +silesia.tar gzstd 1 211947520 80007735 1498 134.87 +silesia.tar gzkp 1 211947520 80088272 1009 200.31 GOB stream of binary data. Highly compressible. https://files.klauspost.com/compress/gob-stream.7z file out level insize outsize millis mb/s -gob-stream zskp 1 1911399616 235022249 3088 590.30 -gob-stream zskp 2 1911399616 205669791 3786 481.34 -gob-stream zskp 3 1911399616 185792019 9324 195.48 +gob-stream zskp 1 1911399616 233948096 3230 564.34 +gob-stream zskp 2 1911399616 203997694 4997 364.73 +gob-stream zskp 3 1911399616 173526523 13435 135.68 +gob-stream zskp 4 1911399616 162195235 47559 38.33 + gob-stream zstd 1 1911399616 249810424 2637 691.26 gob-stream zstd 3 1911399616 208192146 3490 522.31 gob-stream zstd 6 1911399616 193632038 6687 272.56 -gob-stream gzstd 1 1911399616 357382641 10251 177.82 -gob-stream gzkp 1 1911399616 362156523 5695 320.08 +gob-stream zstd 9 1911399616 177620386 16175 112.70 + +gob-stream gzstd 1 1911399616 357382013 9046 201.49 +gob-stream gzkp 1 1911399616 359136669 4885 373.08 The test data for the Large Text Compression Benchmark is the first 10^9 bytes of the English Wikipedia dump on Mar. 3, 2006. http://mattmahoney.net/dc/textdata.html file out level insize outsize millis mb/s -enwik9 zskp 1 1000000000 343848582 3609 264.18 -enwik9 zskp 2 1000000000 317276632 5746 165.97 -enwik9 zskp 3 1000000000 294540704 11725 81.34 +enwik9 zskp 1 1000000000 343833605 3687 258.64 +enwik9 zskp 2 1000000000 317001237 7672 124.29 +enwik9 zskp 3 1000000000 291915823 15923 59.89 +enwik9 zskp 4 1000000000 261710291 77697 12.27 + enwik9 zstd 1 1000000000 358072021 3110 306.65 enwik9 zstd 3 1000000000 313734672 4784 199.35 enwik9 zstd 6 1000000000 295138875 10290 92.68 -enwik9 gzstd 1 1000000000 382578136 9604 99.30 -enwik9 gzkp 1 1000000000 383825945 6544 145.73 +enwik9 zstd 9 1000000000 278348700 28549 33.40 + +enwik9 gzstd 1 1000000000 382578136 8608 110.78 +enwik9 gzkp 1 1000000000 382781160 5628 169.45 Highly compressible JSON file. https://files.klauspost.com/compress/github-june-2days-2019.json.zst file out level insize outsize millis mb/s -github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40 -github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96 -github-june-2days-2019.json zskp 3 6273951764 537511906 29252 204.54 +github-june-2days-2019.json zskp 1 6273951764 697439532 9789 611.17 +github-june-2days-2019.json zskp 2 6273951764 610876538 18553 322.49 +github-june-2days-2019.json zskp 3 6273951764 517662858 44186 135.41 +github-june-2days-2019.json zskp 4 6273951764 464617114 165373 36.18 + github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00 github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57 github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18 -github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79 -github-june-2days-2019.json gzkp 1 6273951764 1128755542 19236 311.03 +github-june-2days-2019.json zstd 9 6273951764 601974523 52413 114.16 + +github-june-2days-2019.json gzstd 1 6273951764 1164397768 26793 223.32 +github-june-2days-2019.json gzkp 1 6273951764 1120631856 17693 338.16 VM Image, Linux mint with a few installed applications: https://files.klauspost.com/compress/rawstudio-mint14.7z file out level insize outsize millis mb/s -rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84 -rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07 -rawstudio-mint14.tar zskp 3 8558382592 3224594213 71751 113.75 +rawstudio-mint14.tar zskp 1 8558382592 3718400221 18206 448.29 +rawstudio-mint14.tar zskp 2 8558382592 3326118337 37074 220.15 +rawstudio-mint14.tar zskp 3 8558382592 3163842361 87306 93.49 +rawstudio-mint14.tar zskp 4 8558382592 2970480650 783862 10.41 + rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27 rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92 rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77 -rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40 -rawstudio-mint14.tar gzkp 1 8558382592 3970463184 41749 195.49 +rawstudio-mint14.tar zstd 9 8558382592 3160778861 140946 57.91 + +rawstudio-mint14.tar gzstd 1 8558382592 3926234992 51345 158.96 +rawstudio-mint14.tar gzkp 1 8558382592 3960117298 36722 222.26 CSV data: https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst file out level insize outsize millis mb/s -nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35 -nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44 -nyc-taxi-data-10M.csv zskp 3 3325605752 538490114 19880 159.53 +nyc-taxi-data-10M.csv zskp 1 3325605752 641319332 9462 335.17 +nyc-taxi-data-10M.csv zskp 2 3325605752 588976126 17570 180.50 +nyc-taxi-data-10M.csv zskp 3 3325605752 529329260 32432 97.79 +nyc-taxi-data-10M.csv zskp 4 3325605752 474949772 138025 22.98 + nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18 nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07 nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27 -nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83 -nyc-taxi-data-10M.csv gzkp 1 3325605752 924718719 16388 193.53 -``` +nyc-taxi-data-10M.csv zstd 9 3325605752 517554797 64565 49.12 -### Converters - -As part of the development process a *Snappy* -> *Zstandard* converter was also built. - -This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream. -Note that a single block is not framed. - -Conversion is done by converting the stream directly from Snappy without intermediate full decoding. -Therefore the compression ratio is much less than what can be done by a full decompression -and compression, and a faulty Snappy stream may lead to a faulty Zstandard stream without -any errors being generated. -No CRC value is being generated and not all CRC values of the Snappy stream are checked. -However, it provides really fast re-compression of Snappy streams. - - -``` -BenchmarkSnappy_ConvertSilesia-8 1 1156001600 ns/op 183.35 MB/s -Snappy len 103008711 -> zstd len 82687318 - -BenchmarkSnappy_Enwik9-8 1 6472998400 ns/op 154.49 MB/s -Snappy len 508028601 -> zstd len 390921079 +nyc-taxi-data-10M.csv gzstd 1 3325605752 928654908 21270 149.11 +nyc-taxi-data-10M.csv gzkp 1 3325605752 922273214 13929 227.68 ``` - -```Go - s := zstd.SnappyConverter{} - n, err = s.Convert(input, output) - if err != nil { - fmt.Println("Re-compressed stream to", n, "bytes") - } -``` - -The converter `s` can be reused to avoid allocations, even after errors. - - ## Decompressor Staus: STABLE - there may still be subtle bugs, but a wide variety of content has been tested. @@ -287,20 +275,25 @@ For streaming use a simple setup could look like this: import "github.com/klauspost/compress/zstd" func Decompress(in io.Reader, out io.Writer) error { - d, err := zstd.NewReader(input) + d, err := zstd.NewReader(in) if err != nil { return err } defer d.Close() // Copy content... - _, err := io.Copy(out, d) + _, err = io.Copy(out, d) return err } ``` -It is important to use the "Close" function when you no longer need the Reader to stop running goroutines. -See "Allocation-less operation" below. +It is important to use the "Close" function when you no longer need the Reader to stop running goroutines, +when running with default settings. +Goroutines will exit once an error has been returned, including `io.EOF` at the end of a stream. + +Streams are decoded concurrently in 4 asynchronous stages to give the best possible throughput. +However, if you prefer synchronous decompression, use `WithDecoderConcurrency(1)` which will decompress data +as it is being requested only. For decoding buffers, it could look something like this: @@ -309,7 +302,7 @@ import "github.com/klauspost/compress/zstd" // Create a reader that caches decompressors. // For this operation type we supply a nil Reader. -var decoder, _ = zstd.NewReader(nil) +var decoder, _ = zstd.NewReader(nil, WithDecoderConcurrency(0)) // Decompress a buffer. We don't supply a destination buffer, // so it will be allocated by the decoder. @@ -319,9 +312,12 @@ func Decompress(src []byte) ([]byte, error) { ``` Both of these cases should provide the functionality needed. -The decoder can be used for *concurrent* decompression of multiple buffers. +The decoder can be used for *concurrent* decompression of multiple buffers. +By default 4 decompressors will be created. + It will only allow a certain number of concurrent operations to run. -To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder. +To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder. +It is possible to use `WithDecoderConcurrency(0)` to create GOMAXPROCS decoders. ### Dictionaries @@ -337,6 +333,21 @@ A re-used Decoder will still contain the dictionaries registered. When registering multiple dictionaries with the same ID, the last one will be used. +It is possible to use dictionaries when compressing data. + +To enable a dictionary use `WithEncoderDict(dict []byte)`. Here only one dictionary will be used +and it will likely be used even if it doesn't improve compression. + +The used dictionary must be used to decompress the content. + +For any real gains, the dictionary should be built with similar data. +If an unsuitable dictionary is used the output may be slightly larger than using no dictionary. +Use the [zstd commandline tool](https://github.com/facebook/zstd/releases) to build a dictionary from sample data. +For information see [zstd dictionary information](https://github.com/facebook/zstd#the-case-for-small-data-compression). + +For now there is a fixed startup performance penalty for compressing content with dictionaries. +This will likely be improved over time. Just be aware to test performance when implementing. + ### Allocation-less operation The decoder has been designed to operate without allocations after a warmup. @@ -358,70 +369,71 @@ In this case no unneeded allocations should be made. The buffer decoder does everything on the same goroutine and does nothing concurrently. It can however decode several buffers concurrently. Use `WithDecoderConcurrency(n)` to limit that. -The stream decoder operates on +The stream decoder will create goroutines that: -* One goroutine reads input and splits the input to several block decoders. -* A number of decoders will decode blocks. -* A goroutine coordinates these blocks and sends history from one to the next. +1) Reads input and splits the input into blocks. +2) Decompression of literals. +3) Decompression of sequences. +4) Reconstruction of output stream. So effectively this also means the decoder will "read ahead" and prepare data to always be available for output. +The concurrency level will, for streams, determine how many blocks ahead the compression will start. + Since "blocks" are quite dependent on the output of the previous block stream decoding will only have limited concurrency. -In practice this means that concurrency is often limited to utilizing about 2 cores effectively. - - +In practice this means that concurrency is often limited to utilizing about 3 cores effectively. + ### Benchmarks -These are some examples of performance compared to [datadog cgo library](https://github.com/DataDog/zstd). - The first two are streaming decodes and the last are smaller inputs. - + +Running on AMD Ryzen 9 3950X 16-Core Processor. AMD64 assembly used. + ``` -BenchmarkDecoderSilesia-8 3 385000067 ns/op 550.51 MB/s 5498 B/op 8 allocs/op -BenchmarkDecoderSilesiaCgo-8 6 197666567 ns/op 1072.25 MB/s 270672 B/op 8 allocs/op - -BenchmarkDecoderEnwik9-8 1 2027001600 ns/op 493.34 MB/s 10496 B/op 18 allocs/op -BenchmarkDecoderEnwik9Cgo-8 2 979499200 ns/op 1020.93 MB/s 270672 B/op 8 allocs/op - -Concurrent performance: - -BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-16 28915 42469 ns/op 4340.07 MB/s 114 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-16 116505 9965 ns/op 11900.16 MB/s 16 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-16 8952 134272 ns/op 3588.70 MB/s 915 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-16 11820 102538 ns/op 4161.90 MB/s 594 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-16 34782 34184 ns/op 3661.88 MB/s 60 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-16 27712 43447 ns/op 3500.58 MB/s 99 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-16 62826 18750 ns/op 21845.10 MB/s 104 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-16 631545 1794 ns/op 57078.74 MB/s 2 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-16 1690140 712 ns/op 172938.13 MB/s 1 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-16 10432 113593 ns/op 6180.73 MB/s 1143 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/html.zst-16 113206 10671 ns/op 9596.27 MB/s 15 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-16 1530615 779 ns/op 5229.49 MB/s 0 B/op 0 allocs/op - -BenchmarkDecoder_DecodeAllParallelCgo/kppkn.gtb.zst-16 65217 16192 ns/op 11383.34 MB/s 46 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/geo.protodata.zst-16 292671 4039 ns/op 29363.19 MB/s 6 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/plrabn12.txt.zst-16 26314 46021 ns/op 10470.43 MB/s 293 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/lcet10.txt.zst-16 33897 34900 ns/op 12227.96 MB/s 205 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/asyoulik.txt.zst-16 104348 11433 ns/op 10949.01 MB/s 20 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/alice29.txt.zst-16 75949 15510 ns/op 9805.60 MB/s 32 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/html_x_4.zst-16 173910 6756 ns/op 60624.29 MB/s 37 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/paper-100k.pdf.zst-16 923076 1339 ns/op 76474.87 MB/s 1 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/fireworks.jpeg.zst-16 922920 1351 ns/op 91102.57 MB/s 2 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/urls.10K.zst-16 27649 43618 ns/op 16096.19 MB/s 407 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/html.zst-16 279073 4160 ns/op 24614.18 MB/s 6 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16 749938 1579 ns/op 2581.71 MB/s 0 B/op 0 allocs/op +BenchmarkDecoderSilesia-32 5 206878840 ns/op 1024.50 MB/s 49808 B/op 43 allocs/op +BenchmarkDecoderEnwik9-32 1 1271809000 ns/op 786.28 MB/s 72048 B/op 52 allocs/op + +Concurrent blocks, performance: + +BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-32 67356 17857 ns/op 10321.96 MB/s 22.48 pct 102 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-32 266656 4421 ns/op 26823.21 MB/s 11.89 pct 19 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-32 20992 56842 ns/op 8477.17 MB/s 39.90 pct 754 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-32 27456 43932 ns/op 9714.01 MB/s 33.27 pct 524 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-32 78432 15047 ns/op 8319.15 MB/s 40.34 pct 66 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-32 65800 18436 ns/op 8249.63 MB/s 37.75 pct 88 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-32 102993 11523 ns/op 35546.09 MB/s 3.637 pct 143 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-32 1000000 1070 ns/op 95720.98 MB/s 80.53 pct 3 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-32 749802 1752 ns/op 70272.35 MB/s 100.0 pct 5 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-32 22640 52934 ns/op 13263.37 MB/s 26.25 pct 1014 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/html.zst-32 226412 5232 ns/op 19572.27 MB/s 14.49 pct 20 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-32 923041 1276 ns/op 3194.71 MB/s 31.26 pct 0 B/op 0 allocs/op ``` -This reflects the performance around May 2020, but this may be out of date. +This reflects the performance around May 2022, but this may be out of date. + +## Zstd inside ZIP files + +It is possible to use zstandard to compress individual files inside zip archives. +While this isn't widely supported it can be useful for internal files. + +To support the compression and decompression of these files you must register a compressor and decompressor. + +It is highly recommended registering the (de)compressors on individual zip Reader/Writer and NOT +use the global registration functions. The main reason for this is that 2 registrations from +different packages will result in a panic. + +It is a good idea to only have a single compressor and decompressor, since they can be used for multiple zip +files concurrently, and using a single instance will allow reusing some resources. + +See [this example](https://pkg.go.dev/github.com/klauspost/compress/zstd#example-ZipCompressor) for +how to compress and decompress files inside zip archives. # Contributions Contributions are always welcome. For new features/fixes, remember to add tests and for performance enhancements include benchmarks. -For sending files for reproducing errors use a service like [goobox](https://goobox.io/#/upload) or similar to share your files. - For general feedback and experience reports, feel free to open an issue or write me on [Twitter](https://twitter.com/sh0dan). This package includes the excellent [`github.com/cespare/xxhash`](https://github.com/cespare/xxhash) package Copyright (c) 2016 Caleb Spare. diff --git a/vendor/github.com/klauspost/compress/zstd/bitreader.go b/vendor/github.com/klauspost/compress/zstd/bitreader.go index 8544585..d7cd15b 100644 --- a/vendor/github.com/klauspost/compress/zstd/bitreader.go +++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go @@ -7,6 +7,7 @@ package zstd import ( "encoding/binary" "errors" + "fmt" "io" "math/bits" ) @@ -50,16 +51,23 @@ func (b *bitReader) getBits(n uint8) int { if n == 0 /*|| b.bitsRead >= 64 */ { return 0 } - return b.getBitsFast(n) + return int(b.get32BitsFast(n)) } -// getBitsFast requires that at least one bit is requested every time. +// get32BitsFast requires that at least one bit is requested every time. // There are no checks if the buffer is filled. -func (b *bitReader) getBitsFast(n uint8) int { +func (b *bitReader) get32BitsFast(n uint8) uint32 { const regMask = 64 - 1 v := uint32((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) b.bitsRead += n - return int(v) + return v +} + +func (b *bitReader) get16BitsFast(n uint8) uint16 { + const regMask = 64 - 1 + v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) + b.bitsRead += n + return v } // fillFast() will make sure at least 32 bits are available. @@ -125,6 +133,9 @@ func (b *bitReader) remain() uint { func (b *bitReader) close() error { // Release reference. b.in = nil + if !b.finished() { + return fmt.Errorf("%d extra bits on block, should be 0", b.remain()) + } if b.bitsRead > 64 { return io.ErrUnexpectedEOF } diff --git a/vendor/github.com/klauspost/compress/zstd/bitwriter.go b/vendor/github.com/klauspost/compress/zstd/bitwriter.go index 303ae90..b366182 100644 --- a/vendor/github.com/klauspost/compress/zstd/bitwriter.go +++ b/vendor/github.com/klauspost/compress/zstd/bitwriter.go @@ -38,7 +38,7 @@ func (b *bitWriter) addBits16NC(value uint16, bits uint8) { b.nBits += bits } -// addBits32NC will add up to 32 bits. +// addBits32NC will add up to 31 bits. // It will not check if there is space for them, // so the caller must ensure that it has flushed recently. func (b *bitWriter) addBits32NC(value uint32, bits uint8) { @@ -46,6 +46,26 @@ func (b *bitWriter) addBits32NC(value uint32, bits uint8) { b.nBits += bits } +// addBits64NC will add up to 64 bits. +// There must be space for 32 bits. +func (b *bitWriter) addBits64NC(value uint64, bits uint8) { + if bits <= 31 { + b.addBits32Clean(uint32(value), bits) + return + } + b.addBits32Clean(uint32(value), 32) + b.flush32() + b.addBits32Clean(uint32(value>>32), bits-32) +} + +// addBits32Clean will add up to 32 bits. +// It will not check if there is space for them. +// The input must not contain more bits than specified. +func (b *bitWriter) addBits32Clean(value uint32, bits uint8) { + b.bitContainer |= uint64(value) << (b.nBits & 63) + b.nBits += bits +} + // addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated. // It will not check if there is space for them, so the caller must ensure that it has flushed recently. func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index c8ec6e3..b2bca33 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -5,9 +5,14 @@ package zstd import ( + "bytes" + "encoding/binary" "errors" "fmt" "io" + "io/ioutil" + "os" + "path/filepath" "sync" "github.com/klauspost/compress/huff0" @@ -38,6 +43,9 @@ const ( // maxCompressedBlockSize is the biggest allowed compressed block size (128KB) maxCompressedBlockSize = 128 << 10 + compressedBlockOverAlloc = 16 + maxCompressedBlockSizeAlloc = 128<<10 + compressedBlockOverAlloc + // Maximum possible block size (all Raw+Uncompressed). maxBlockSize = (1 << 21) - 1 @@ -76,17 +84,25 @@ type blockDec struct { // Window size of the block. WindowSize uint64 - history chan *history - input chan struct{} - result chan decodeOutput - sequenceBuf []seq - err error - decWG sync.WaitGroup + err error + + // Check against this crc + checkCRC []byte // Frame to use for singlethreaded decoding. // Should not be used by the decoder itself since parent may be another frame. localFrame *frameDec + sequence []seqVals + + async struct { + newHist *history + literals []byte + seqData []byte + seqSize int // Size of uncompressed sequences + fcs uint64 + } + // Block is RLE, this is the size. RLESize uint32 tmp [4]byte @@ -109,13 +125,8 @@ func (b *blockDec) String() string { func newBlockDec(lowMem bool) *blockDec { b := blockDec{ - lowMem: lowMem, - result: make(chan decodeOutput, 1), - input: make(chan struct{}, 1), - history: make(chan *history, 1), + lowMem: lowMem, } - b.decWG.Add(1) - go b.startDecoder() return &b } @@ -123,44 +134,60 @@ func newBlockDec(lowMem bool) *blockDec { // Input must be a start of a block and will be at the end of the block when returned. func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { b.WindowSize = windowSize - tmp := br.readSmall(3) - if tmp == nil { - if debug { - println("Reading block header:", io.ErrUnexpectedEOF) - } - return io.ErrUnexpectedEOF + tmp, err := br.readSmall(3) + if err != nil { + println("Reading block header:", err) + return err } bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16) b.Last = bh&1 != 0 b.Type = blockType((bh >> 1) & 3) // find size. cSize := int(bh >> 3) - maxSize := maxBlockSize + maxSize := maxCompressedBlockSizeAlloc switch b.Type { case blockTypeReserved: return ErrReservedBlockType case blockTypeRLE: + if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) { + if debugDecoder { + printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b) + } + return ErrWindowSizeExceeded + } b.RLESize = uint32(cSize) if b.lowMem { maxSize = cSize } cSize = 1 case blockTypeCompressed: - if debug { + if debugDecoder { println("Data size on stream:", cSize) } b.RLESize = 0 - maxSize = maxCompressedBlockSize + maxSize = maxCompressedBlockSizeAlloc if windowSize < maxCompressedBlockSize && b.lowMem { - maxSize = int(windowSize) + maxSize = int(windowSize) + compressedBlockOverAlloc } if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize { - if debug { + if debugDecoder { printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b) } return ErrCompressedSizeTooBig } + // Empty compressed blocks must at least be 2 bytes + // for Literals_Block_Type and one for Sequences_Section_Header. + if cSize < 2 { + return ErrBlockTooSmall + } case blockTypeRaw: + if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) { + if debugDecoder { + printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b) + } + return ErrWindowSizeExceeded + } + b.RLESize = 0 // We do not need a destination for raw blocks. maxSize = -1 @@ -170,19 +197,18 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { // Read block data. if cap(b.dataStorage) < cSize { - if b.lowMem { - b.dataStorage = make([]byte, 0, cSize) + if b.lowMem || cSize > maxCompressedBlockSize { + b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc) } else { - b.dataStorage = make([]byte, 0, maxBlockSize) + b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc) } } if cap(b.dst) <= maxSize { b.dst = make([]byte, 0, maxSize+1) } - var err error b.data, err = br.readBig(cSize, b.dataStorage) if err != nil { - if debug { + if debugDecoder { println("Reading block:", err, "(", cSize, ")", len(b.data)) printf("%T", br) } @@ -196,85 +222,14 @@ func (b *blockDec) sendErr(err error) { b.Last = true b.Type = blockTypeReserved b.err = err - b.input <- struct{}{} } // Close will release resources. // Closed blockDec cannot be reset. func (b *blockDec) Close() { - close(b.input) - close(b.history) - close(b.result) - b.decWG.Wait() -} - -// decodeAsync will prepare decoding the block when it receives input. -// This will separate output and history. -func (b *blockDec) startDecoder() { - defer b.decWG.Done() - for range b.input { - //println("blockDec: Got block input") - switch b.Type { - case blockTypeRLE: - if cap(b.dst) < int(b.RLESize) { - if b.lowMem { - b.dst = make([]byte, b.RLESize) - } else { - b.dst = make([]byte, maxBlockSize) - } - } - o := decodeOutput{ - d: b, - b: b.dst[:b.RLESize], - err: nil, - } - v := b.data[0] - for i := range o.b { - o.b[i] = v - } - hist := <-b.history - hist.append(o.b) - b.result <- o - case blockTypeRaw: - o := decodeOutput{ - d: b, - b: b.data, - err: nil, - } - hist := <-b.history - hist.append(o.b) - b.result <- o - case blockTypeCompressed: - b.dst = b.dst[:0] - err := b.decodeCompressed(nil) - o := decodeOutput{ - d: b, - b: b.dst, - err: err, - } - if debug { - println("Decompressed to", len(b.dst), "bytes, error:", err) - } - b.result <- o - case blockTypeReserved: - // Used for returning errors. - <-b.history - b.result <- decodeOutput{ - d: b, - b: nil, - err: b.err, - } - default: - panic("Invalid block type") - } - if debug { - println("blockDec: Finished block") - } - } } -// decodeAsync will prepare decoding the block when it receives the history. -// If history is provided, it will not fetch it from the channel. +// decodeBuf func (b *blockDec) decodeBuf(hist *history) error { switch b.Type { case blockTypeRLE: @@ -297,14 +252,23 @@ func (b *blockDec) decodeBuf(hist *history) error { return nil case blockTypeCompressed: saved := b.dst - b.dst = hist.b - hist.b = nil + // Append directly to history + if hist.ignoreBuffer == 0 { + b.dst = hist.b + hist.b = nil + } else { + b.dst = b.dst[:0] + } err := b.decodeCompressed(hist) - if debug { + if debugDecoder { println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err) } - hist.b = b.dst - b.dst = saved + if hist.ignoreBuffer == 0 { + hist.b = b.dst + b.dst = saved + } else { + hist.appendKeep(b.dst) + } return err case blockTypeReserved: // Used for returning errors. @@ -314,30 +278,18 @@ func (b *blockDec) decodeBuf(hist *history) error { } } -// decodeCompressed will start decompressing a block. -// If no history is supplied the decoder will decodeAsync as much as possible -// before fetching from blockDec.history -func (b *blockDec) decodeCompressed(hist *history) error { - in := b.data - delayedHistory := hist == nil - - if delayedHistory { - // We must always grab history. - defer func() { - if hist == nil { - <-b.history - } - }() - } +func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err error) { // There must be at least one byte for Literals_Block_Type and one for Sequences_Section_Header if len(in) < 2 { - return ErrBlockTooSmall + return in, ErrBlockTooSmall } + litType := literalsBlockType(in[0] & 3) var litRegenSize int var litCompSize int sizeFormat := (in[0] >> 2) & 3 var fourStreams bool + var literals []byte switch litType { case literalsBlockRaw, literalsBlockRLE: switch sizeFormat { @@ -353,7 +305,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { // Regenerated_Size uses 20 bits (0-1048575). Literals_Section_Header uses 3 bytes. if len(in) < 3 { println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) - return ErrBlockTooSmall + return in, ErrBlockTooSmall } litRegenSize = int(in[0]>>4) + (int(in[1]) << 4) + (int(in[2]) << 12) in = in[3:] @@ -364,7 +316,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { // Both Regenerated_Size and Compressed_Size use 10 bits (0-1023). if len(in) < 3 { println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) - return ErrBlockTooSmall + return in, ErrBlockTooSmall } n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) litRegenSize = int(n & 1023) @@ -375,7 +327,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { fourStreams = true if len(in) < 4 { println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) - return ErrBlockTooSmall + return in, ErrBlockTooSmall } n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) litRegenSize = int(n & 16383) @@ -385,7 +337,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { fourStreams = true if len(in) < 5 { println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) - return ErrBlockTooSmall + return in, ErrBlockTooSmall } n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) + (uint64(in[4]) << 28) litRegenSize = int(n & 262143) @@ -393,16 +345,18 @@ func (b *blockDec) decodeCompressed(hist *history) error { in = in[5:] } } - if debug { + if debugDecoder { println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams) } - var literals []byte - var huff *huff0.Scratch + if litRegenSize > int(b.WindowSize) || litRegenSize > maxCompressedBlockSize { + return in, ErrWindowSizeExceeded + } + switch litType { case literalsBlockRaw: if len(in) < litRegenSize { println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litRegenSize) - return ErrBlockTooSmall + return in, ErrBlockTooSmall } literals = in[:litRegenSize] in = in[litRegenSize:] @@ -410,7 +364,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { case literalsBlockRLE: if len(in) < 1 { println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", 1) - return ErrBlockTooSmall + return in, ErrBlockTooSmall } if cap(b.literalBuf) < litRegenSize { if b.lowMem { @@ -421,7 +375,6 @@ func (b *blockDec) decodeCompressed(hist *history) error { b.literalBuf = make([]byte, litRegenSize) } else { b.literalBuf = make([]byte, litRegenSize, maxCompressedLiteralSize) - } } } @@ -431,45 +384,79 @@ func (b *blockDec) decodeCompressed(hist *history) error { literals[i] = v } in = in[1:] - if debug { + if debugDecoder { printf("Found %d RLE compressed literals\n", litRegenSize) } case literalsBlockTreeless: if len(in) < litCompSize { println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize) - return ErrBlockTooSmall + return in, ErrBlockTooSmall } // Store compressed literals, so we defer decoding until we get history. literals = in[:litCompSize] in = in[litCompSize:] - if debug { + if debugDecoder { printf("Found %d compressed literals\n", litCompSize) } + huff := hist.huffTree + if huff == nil { + return in, errors.New("literal block was treeless, but no history was defined") + } + // Ensure we have space to store it. + if cap(b.literalBuf) < litRegenSize { + if b.lowMem { + b.literalBuf = make([]byte, 0, litRegenSize) + } else { + b.literalBuf = make([]byte, 0, maxCompressedLiteralSize) + } + } + var err error + // Use our out buffer. + huff.MaxDecodedSize = maxCompressedBlockSize + if fourStreams { + literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) + } else { + literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals) + } + // Make sure we don't leak our literals buffer + if err != nil { + println("decompressing literals:", err) + return in, err + } + if len(literals) != litRegenSize { + return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) + } + case literalsBlockCompressed: if len(in) < litCompSize { println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize) - return ErrBlockTooSmall + return in, ErrBlockTooSmall } literals = in[:litCompSize] in = in[litCompSize:] - huff = huffDecoderPool.Get().(*huff0.Scratch) - var err error // Ensure we have space to store it. if cap(b.literalBuf) < litRegenSize { if b.lowMem { b.literalBuf = make([]byte, 0, litRegenSize) } else { - b.literalBuf = make([]byte, 0, maxCompressedLiteralSize) + b.literalBuf = make([]byte, 0, maxCompressedBlockSize) } } - if huff == nil { - huff = &huff0.Scratch{} + huff := hist.huffTree + if huff == nil || (hist.dict != nil && huff == hist.dict.litEnc) { + huff = huffDecoderPool.Get().(*huff0.Scratch) + if huff == nil { + huff = &huff0.Scratch{} + } } + var err error huff, literals, err = huff0.ReadTable(literals, huff) if err != nil { println("reading huffman table:", err) - return err + return in, err } + hist.huffTree = huff + huff.MaxDecodedSize = maxCompressedBlockSize // Use our out buffer. if fourStreams { literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) @@ -478,27 +465,61 @@ func (b *blockDec) decodeCompressed(hist *history) error { } if err != nil { println("decoding compressed literals:", err) - return err + return in, err } // Make sure we don't leak our literals buffer if len(literals) != litRegenSize { - return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) + return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) } - if debug { + if debugDecoder { printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize) } } + hist.decoders.literals = literals + return in, nil +} + +// decodeCompressed will start decompressing a block. +func (b *blockDec) decodeCompressed(hist *history) error { + in := b.data + in, err := b.decodeLiterals(in, hist) + if err != nil { + return err + } + err = b.prepareSequences(in, hist) + if err != nil { + return err + } + if hist.decoders.nSeqs == 0 { + b.dst = append(b.dst, hist.decoders.literals...) + return nil + } + before := len(hist.decoders.out) + err = hist.decoders.decodeSync(hist.b[hist.ignoreBuffer:]) + if err != nil { + return err + } + if hist.decoders.maxSyncLen > 0 { + hist.decoders.maxSyncLen += uint64(before) + hist.decoders.maxSyncLen -= uint64(len(hist.decoders.out)) + } + b.dst = hist.decoders.out + hist.recentOffsets = hist.decoders.prevOffset + return nil +} +func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) { + if debugDecoder { + printf("prepareSequences: %d byte(s) input\n", len(in)) + } // Decode Sequences // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section if len(in) < 1 { return ErrBlockTooSmall } + var nSeqs int seqHeader := in[0] - nSeqs := 0 switch { - case seqHeader == 0: - in = in[1:] case seqHeader < 128: nSeqs = int(seqHeader) in = in[1:] @@ -515,19 +536,16 @@ func (b *blockDec) decodeCompressed(hist *history) error { nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8) in = in[3:] } - // Allocate sequences - if cap(b.sequenceBuf) < nSeqs { - if b.lowMem { - b.sequenceBuf = make([]seq, nSeqs) - } else { - // Allocate max - b.sequenceBuf = make([]seq, nSeqs, maxSequences) + if nSeqs == 0 && len(in) != 0 { + // When no sequences, there should not be any more data... + if debugDecoder { + printf("prepareSequences: 0 sequences, but %d byte(s) left on stream\n", len(in)) } - } else { - // Reuse buffer - b.sequenceBuf = b.sequenceBuf[:nSeqs] + return ErrUnexpectedBlockSize } - var seqs = &sequenceDecs{} + + var seqs = &hist.decoders + seqs.nSeqs = nSeqs if nSeqs > 0 { if len(in) < 1 { return ErrBlockTooSmall @@ -535,12 +553,12 @@ func (b *blockDec) decodeCompressed(hist *history) error { br := byteReader{b: in, off: 0} compMode := br.Uint8() br.advance(1) - if debug { + if debugDecoder { printf("Compression modes: 0b%b", compMode) } for i := uint(0); i < 3; i++ { mode := seqCompMode((compMode >> (6 - i*2)) & 3) - if debug { + if debugDecoder { println("Table", tableIndex(i), "is", mode) } var seq *sequenceDec @@ -556,6 +574,9 @@ func (b *blockDec) decodeCompressed(hist *history) error { } switch mode { case compModePredefined: + if seq.fse != nil && !seq.fse.preDefined { + fseDecoderPool.Put(seq.fse) + } seq.fse = &fsePredef[i] case compModeRLE: if br.remain() < 1 { @@ -563,34 +584,36 @@ func (b *blockDec) decodeCompressed(hist *history) error { } v := br.Uint8() br.advance(1) - dec := fseDecoderPool.Get().(*fseDecoder) + if seq.fse == nil || seq.fse.preDefined { + seq.fse = fseDecoderPool.Get().(*fseDecoder) + } symb, err := decSymbolValue(v, symbolTableX[i]) if err != nil { printf("RLE Transform table (%v) error: %v", tableIndex(i), err) return err } - dec.setRLE(symb) - seq.fse = dec - if debug { + seq.fse.setRLE(symb) + if debugDecoder { printf("RLE set to %+v, code: %v", symb, v) } case compModeFSE: println("Reading table for", tableIndex(i)) - dec := fseDecoderPool.Get().(*fseDecoder) - err := dec.readNCount(&br, uint16(maxTableSymbol[i])) + if seq.fse == nil || seq.fse.preDefined { + seq.fse = fseDecoderPool.Get().(*fseDecoder) + } + err := seq.fse.readNCount(&br, uint16(maxTableSymbol[i])) if err != nil { println("Read table error:", err) return err } - err = dec.transform(symbolTableX[i]) + err = seq.fse.transform(symbolTableX[i]) if err != nil { println("Transform table error:", err) return err } - if debug { - println("Read table ok", "symbolLen:", dec.symbolLen) + if debugDecoder { + println("Read table ok", "symbolLen:", seq.fse.symbolLen) } - seq.fse = dec case compModeRepeat: seq.repeat = true } @@ -600,140 +623,106 @@ func (b *blockDec) decodeCompressed(hist *history) error { } in = br.unread() } - - // Wait for history. - // All time spent after this is critical since it is strictly sequential. - if hist == nil { - hist = <-b.history - if hist.error { - return ErrDecoderClosed - } + if debugDecoder { + println("Literals:", len(seqs.literals), "hash:", xxhash.Sum64(seqs.literals), "and", seqs.nSeqs, "sequences.") } - // Decode treeless literal block. - if litType == literalsBlockTreeless { - // TODO: We could send the history early WITHOUT the stream history. - // This would allow decoding treeless literials before the byte history is available. - // Silencia stats: Treeless 4393, with: 32775, total: 37168, 11% treeless. - // So not much obvious gain here. - - if hist.huffTree == nil { - return errors.New("literal block was treeless, but no history was defined") - } - // Ensure we have space to store it. - if cap(b.literalBuf) < litRegenSize { - if b.lowMem { - b.literalBuf = make([]byte, 0, litRegenSize) - } else { - b.literalBuf = make([]byte, 0, maxCompressedLiteralSize) - } - } - var err error - // Use our out buffer. - huff = hist.huffTree - if fourStreams { - literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) - } else { - literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals) - } - // Make sure we don't leak our literals buffer - if err != nil { - println("decompressing literals:", err) - return err - } - if len(literals) != litRegenSize { - return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) - } - } else { - if hist.huffTree != nil && huff != nil { - if hist.dict == nil || hist.dict.litDec != hist.huffTree { - huffDecoderPool.Put(hist.huffTree) - } - hist.huffTree = nil + if nSeqs == 0 { + if len(b.sequence) > 0 { + b.sequence = b.sequence[:0] } + return nil } - if huff != nil { - hist.huffTree = huff + br := seqs.br + if br == nil { + br = &bitReader{} } - if debug { - println("Final literals:", len(literals), "hash:", xxhash.Sum64(literals), "and", nSeqs, "sequences.") + if err := br.init(in); err != nil { + return err } - if nSeqs == 0 { - // Decompressed content is defined entirely as Literals Section content. - b.dst = append(b.dst, literals...) - if delayedHistory { - hist.append(literals) + if err := seqs.initialize(br, hist, b.dst); err != nil { + println("initializing sequences:", err) + return err + } + // Extract blocks... + if false && hist.dict == nil { + fatalErr := func(err error) { + if err != nil { + panic(err) + } } - return nil + fn := fmt.Sprintf("n-%d-lits-%d-prev-%d-%d-%d-win-%d.blk", hist.decoders.nSeqs, len(hist.decoders.literals), hist.recentOffsets[0], hist.recentOffsets[1], hist.recentOffsets[2], hist.windowSize) + var buf bytes.Buffer + fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.litLengths.fse)) + fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.matchLengths.fse)) + fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.offsets.fse)) + buf.Write(in) + ioutil.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm) } - seqs, err := seqs.mergeHistory(&hist.decoders) - if err != nil { - return err - } - if debug { - println("History merged ok") + return nil +} + +func (b *blockDec) decodeSequences(hist *history) error { + if cap(b.sequence) < hist.decoders.nSeqs { + if b.lowMem { + b.sequence = make([]seqVals, 0, hist.decoders.nSeqs) + } else { + b.sequence = make([]seqVals, 0, 0x7F00+0xffff) + } } - br := &bitReader{} - if err := br.init(in); err != nil { - return err + b.sequence = b.sequence[:hist.decoders.nSeqs] + if hist.decoders.nSeqs == 0 { + hist.decoders.seqSize = len(hist.decoders.literals) + return nil } + hist.decoders.windowSize = hist.windowSize + hist.decoders.prevOffset = hist.recentOffsets - // TODO: Investigate if sending history without decoders are faster. - // This would allow the sequences to be decoded async and only have to construct stream history. - // If only recent offsets were not transferred, this would be an obvious win. - // Also, if first 3 sequences don't reference recent offsets, all sequences can be decoded. + err := hist.decoders.decode(b.sequence) + hist.recentOffsets = hist.decoders.prevOffset + return err +} +func (b *blockDec) executeSequences(hist *history) error { hbytes := hist.b if len(hbytes) > hist.windowSize { hbytes = hbytes[len(hbytes)-hist.windowSize:] - // We do not need history any more. + // We do not need history anymore. if hist.dict != nil { hist.dict.content = nil } } - - if err := seqs.initialize(br, hist, literals, b.dst); err != nil { - println("initializing sequences:", err) - return err - } - - err = seqs.decode(nSeqs, br, hbytes) + hist.decoders.windowSize = hist.windowSize + hist.decoders.out = b.dst[:0] + err := hist.decoders.execute(b.sequence, hbytes) if err != nil { return err } - if !br.finished() { - return fmt.Errorf("%d extra bits on block, should be 0", br.remain()) - } + return b.updateHistory(hist) +} - err = br.close() - if err != nil { - printf("Closing sequences: %v, %+v\n", err, *br) - } +func (b *blockDec) updateHistory(hist *history) error { if len(b.data) > maxCompressedBlockSize { return fmt.Errorf("compressed block size too large (%d)", len(b.data)) } // Set output and release references. - b.dst = seqs.out - seqs.out, seqs.literals, seqs.hist = nil, nil, nil + b.dst = hist.decoders.out + hist.recentOffsets = hist.decoders.prevOffset - if !delayedHistory { - // If we don't have delayed history, no need to update. - hist.recentOffsets = seqs.prevOffset - return nil - } if b.Last { // if last block we don't care about history. println("Last block, no history returned") hist.b = hist.b[:0] return nil + } else { + hist.append(b.dst) + if debugDecoder { + println("Finished block with ", len(b.sequence), "sequences. Added", len(b.dst), "to history, now length", len(hist.b)) + } } - hist.append(b.dst) - hist.recentOffsets = seqs.prevOffset - if debug { - println("Finished block with literals:", len(literals), "and", nSeqs, "sequences.") - } + hist.decoders.out, hist.decoders.literals = nil, nil return nil } diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go index c584f6a..12e8f6f 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockenc.go +++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go @@ -14,35 +14,52 @@ import ( ) type blockEnc struct { - size int - literals []byte - sequences []seq - coders seqCoders - litEnc *huff0.Scratch - wr bitWriter - - extraLits int - last bool - + size int + literals []byte + sequences []seq + coders seqCoders + litEnc *huff0.Scratch + dictLitEnc *huff0.Scratch + wr bitWriter + + extraLits int output []byte recentOffsets [3]uint32 prevRecentOffsets [3]uint32 + + last bool + lowMem bool } // init should be used once the block has been created. // If called more than once, the effect is the same as calling reset. func (b *blockEnc) init() { - if cap(b.literals) < maxCompressedLiteralSize { - b.literals = make([]byte, 0, maxCompressedLiteralSize) - } - const defSeqs = 200 - b.literals = b.literals[:0] - if cap(b.sequences) < defSeqs { - b.sequences = make([]seq, 0, defSeqs) - } - if cap(b.output) < maxCompressedBlockSize { - b.output = make([]byte, 0, maxCompressedBlockSize) + if b.lowMem { + // 1K literals + if cap(b.literals) < 1<<10 { + b.literals = make([]byte, 0, 1<<10) + } + const defSeqs = 20 + if cap(b.sequences) < defSeqs { + b.sequences = make([]seq, 0, defSeqs) + } + // 1K + if cap(b.output) < 1<<10 { + b.output = make([]byte, 0, 1<<10) + } + } else { + if cap(b.literals) < maxCompressedBlockSize { + b.literals = make([]byte, 0, maxCompressedBlockSize) + } + const defSeqs = 2000 + if cap(b.sequences) < defSeqs { + b.sequences = make([]seq, 0, defSeqs) + } + if cap(b.output) < maxCompressedBlockSize { + b.output = make([]byte, 0, maxCompressedBlockSize) + } } + if b.coders.mlEnc == nil { b.coders.mlEnc = &fseEncoder{} b.coders.mlPrev = &fseEncoder{} @@ -75,6 +92,7 @@ func (b *blockEnc) reset(prev *blockEnc) { if prev != nil { b.recentOffsets = prev.prevRecentOffsets } + b.dictLitEnc = nil } // reset will reset the block for a new encode, but in the same stream, @@ -138,7 +156,7 @@ func (h *literalsHeader) setSize(regenLen int) { switch { case inBits < 5: lh |= (uint64(regenLen) << 3) | (1 << 60) - if debug { + if debugEncoder { got := int(lh>>3) & 0xff if got != regenLen { panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)")) @@ -166,7 +184,7 @@ func (h *literalsHeader) setSizes(compLen, inLen int, single bool) { lh |= 1 << 2 } lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60) - if debug { + if debugEncoder { const mmask = (1 << 24) - 1 n := (lh >> 4) & mmask if int(n&1023) != inLen { @@ -294,8 +312,8 @@ func (b *blockEnc) encodeRaw(a []byte) { bh.setType(blockTypeRaw) b.output = bh.appendTo(b.output[:0]) b.output = append(b.output, a...) - if debug { - println("Adding RAW block, length", len(a)) + if debugEncoder { + println("Adding RAW block, length", len(a), "last:", b.last) } } @@ -307,26 +325,26 @@ func (b *blockEnc) encodeRawTo(dst, src []byte) []byte { bh.setType(blockTypeRaw) dst = bh.appendTo(dst) dst = append(dst, src...) - if debug { - println("Adding RAW block, length", len(src)) + if debugEncoder { + println("Adding RAW block, length", len(src), "last:", b.last) } return dst } // encodeLits can be used if the block is only litLen. -func (b *blockEnc) encodeLits(raw bool) error { +func (b *blockEnc) encodeLits(lits []byte, raw bool) error { var bh blockHeader bh.setLast(b.last) - bh.setSize(uint32(len(b.literals))) + bh.setSize(uint32(len(lits))) // Don't compress extremely small blocks - if len(b.literals) < 32 || raw { - if debug { - println("Adding RAW block, length", len(b.literals)) + if len(lits) < 8 || (len(lits) < 32 && b.dictLitEnc == nil) || raw { + if debugEncoder { + println("Adding RAW block, length", len(lits), "last:", b.last) } bh.setType(blockTypeRaw) b.output = bh.appendTo(b.output) - b.output = append(b.output, b.literals...) + b.output = append(b.output, lits...) return nil } @@ -335,37 +353,42 @@ func (b *blockEnc) encodeLits(raw bool) error { reUsed, single bool err error ) - if len(b.literals) >= 1024 { + if b.dictLitEnc != nil { + b.litEnc.TransferCTable(b.dictLitEnc) + b.litEnc.Reuse = huff0.ReusePolicyAllow + b.dictLitEnc = nil + } + if len(lits) >= 1024 { // Use 4 Streams. - out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) - } else if len(b.literals) > 32 { + out, reUsed, err = huff0.Compress4X(lits, b.litEnc) + } else if len(lits) > 32 { // Use 1 stream single = true - out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc) + out, reUsed, err = huff0.Compress1X(lits, b.litEnc) } else { err = huff0.ErrIncompressible } switch err { case huff0.ErrIncompressible: - if debug { - println("Adding RAW block, length", len(b.literals)) + if debugEncoder { + println("Adding RAW block, length", len(lits), "last:", b.last) } bh.setType(blockTypeRaw) b.output = bh.appendTo(b.output) - b.output = append(b.output, b.literals...) + b.output = append(b.output, lits...) return nil case huff0.ErrUseRLE: - if debug { - println("Adding RLE block, length", len(b.literals)) + if debugEncoder { + println("Adding RLE block, length", len(lits)) } bh.setType(blockTypeRLE) b.output = bh.appendTo(b.output) - b.output = append(b.output, b.literals[0]) + b.output = append(b.output, lits[0]) return nil + case nil: default: return err - case nil: } // Compressed... // Now, allow reuse @@ -373,18 +396,18 @@ func (b *blockEnc) encodeLits(raw bool) error { bh.setType(blockTypeCompressed) var lh literalsHeader if reUsed { - if debug { + if debugEncoder { println("Reused tree, compressed to", len(out)) } lh.setType(literalsBlockTreeless) } else { - if debug { + if debugEncoder { println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable)) } lh.setType(literalsBlockCompressed) } // Set sizes - lh.setSizes(len(out), len(b.literals), single) + lh.setSizes(len(out), len(lits), single) bh.setSize(uint32(len(out) + lh.size() + 1)) // Write block headers. @@ -403,7 +426,7 @@ func fuzzFseEncoder(data []byte) int { return 0 } enc := fseEncoder{} - hist := enc.Histogram()[:256] + hist := enc.Histogram() maxSym := uint8(0) for i, v := range data { v = v & 63 @@ -444,13 +467,19 @@ func fuzzFseEncoder(data []byte) int { } // encode will encode the block and append the output in b.output. -func (b *blockEnc) encode(raw, rawAllLits bool) error { +// Previous offset codes must be pushed if more blocks are expected. +func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { if len(b.sequences) == 0 { - return b.encodeLits(rawAllLits) + return b.encodeLits(b.literals, rawAllLits) } - // We want some difference - if len(b.literals) > (b.size - (b.size >> 5)) { - return errIncompressible + // We want some difference to at least account for the headers. + saved := b.size - len(b.literals) - (b.size >> 5) + if saved < 16 { + if org == nil { + return errIncompressible + } + b.popOffsets() + return b.encodeLits(org, rawAllLits) } var bh blockHeader @@ -466,6 +495,11 @@ func (b *blockEnc) encode(raw, rawAllLits bool) error { reUsed, single bool err error ) + if b.dictLitEnc != nil { + b.litEnc.TransferCTable(b.dictLitEnc) + b.litEnc.Reuse = huff0.ReusePolicyAllow + b.dictLitEnc = nil + } if len(b.literals) >= 1024 && !raw { // Use 4 Streams. out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) @@ -483,7 +517,7 @@ func (b *blockEnc) encode(raw, rawAllLits bool) error { lh.setSize(len(b.literals)) b.output = lh.appendTo(b.output) b.output = append(b.output, b.literals...) - if debug { + if debugEncoder { println("Adding literals RAW, length", len(b.literals)) } case huff0.ErrUseRLE: @@ -491,27 +525,22 @@ func (b *blockEnc) encode(raw, rawAllLits bool) error { lh.setSize(len(b.literals)) b.output = lh.appendTo(b.output) b.output = append(b.output, b.literals[0]) - if debug { + if debugEncoder { println("Adding literals RLE") } - default: - if debug { - println("Adding literals ERROR:", err) - } - return err case nil: // Compressed litLen... if reUsed { - if debug { + if debugEncoder { println("reused tree") } lh.setType(literalsBlockTreeless) } else { - if debug { + if debugEncoder { println("new tree, size:", len(b.litEnc.OutTable)) } lh.setType(literalsBlockCompressed) - if debug { + if debugEncoder { _, _, err := huff0.ReadTable(out, nil) if err != nil { panic(err) @@ -519,16 +548,21 @@ func (b *blockEnc) encode(raw, rawAllLits bool) error { } } lh.setSizes(len(out), len(b.literals), single) - if debug { + if debugEncoder { printf("Compressed %d literals to %d bytes", len(b.literals), len(out)) println("Adding literal header:", lh) } b.output = lh.appendTo(b.output) b.output = append(b.output, out...) b.litEnc.Reuse = huff0.ReusePolicyAllow - if debug { + if debugEncoder { println("Adding literals compressed") } + default: + if debugEncoder { + println("Adding literals ERROR:", err) + } + return err } // Sequence compression @@ -543,7 +577,7 @@ func (b *blockEnc) encode(raw, rawAllLits bool) error { n := len(b.sequences) - 0x7f00 b.output = append(b.output, 255, uint8(n), uint8(n>>8)) } - if debug { + if debugEncoder { println("Encoding", len(b.sequences), "sequences") } b.genCodes() @@ -577,17 +611,17 @@ func (b *blockEnc) encode(raw, rawAllLits bool) error { nSize = nSize + (nSize+2*8*16)>>4 switch { case predefSize <= prevSize && predefSize <= nSize || forcePreDef: - if debug { + if debugEncoder { println("Using predefined", predefSize>>3, "<=", nSize>>3) } return preDef, compModePredefined case prevSize <= nSize: - if debug { + if debugEncoder { println("Using previous", prevSize>>3, "<=", nSize>>3) } return prev, compModeRepeat default: - if debug { + if debugEncoder { println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes") println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen]) } @@ -600,7 +634,7 @@ func (b *blockEnc) encode(raw, rawAllLits bool) error { if llEnc.useRLE { mode |= uint8(compModeRLE) << 6 llEnc.setRLE(b.sequences[0].llCode) - if debug { + if debugEncoder { println("llEnc.useRLE") } } else { @@ -611,7 +645,7 @@ func (b *blockEnc) encode(raw, rawAllLits bool) error { if ofEnc.useRLE { mode |= uint8(compModeRLE) << 4 ofEnc.setRLE(b.sequences[0].ofCode) - if debug { + if debugEncoder { println("ofEnc.useRLE") } } else { @@ -623,7 +657,7 @@ func (b *blockEnc) encode(raw, rawAllLits bool) error { if mlEnc.useRLE { mode |= uint8(compModeRLE) << 2 mlEnc.setRLE(b.sequences[0].mlCode) - if debug { + if debugEncoder { println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen) } } else { @@ -632,7 +666,7 @@ func (b *blockEnc) encode(raw, rawAllLits bool) error { mode |= uint8(m) << 2 } b.output = append(b.output, mode) - if debug { + if debugEncoder { printf("Compression modes: 0b%b", mode) } b.output, err = llEnc.writeCount(b.output) @@ -688,52 +722,53 @@ func (b *blockEnc) encode(raw, rawAllLits bool) error { println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB) } seq-- - if llEnc.maxBits+mlEnc.maxBits+ofEnc.maxBits <= 32 { - // No need to flush (common) - for seq >= 0 { - s = b.sequences[seq] - wr.flush32() - llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode] - // tabelog max is 8 for all. - of.encode(ofB) - ml.encode(mlB) - ll.encode(llB) - wr.flush32() - - // We checked that all can stay within 32 bits - wr.addBits32NC(s.litLen, llB.outBits) - wr.addBits32NC(s.matchLen, mlB.outBits) - wr.addBits32NC(s.offset, ofB.outBits) - - if debugSequences { - println("Encoded seq", seq, s) - } - - seq-- + // Store sequences in reverse... + for seq >= 0 { + s = b.sequences[seq] + + ofB := ofTT[s.ofCode] + wr.flush32() // tablelog max is below 8 for each, so it will fill max 24 bits. + //of.encode(ofB) + nbBitsOut := (uint32(of.state) + ofB.deltaNbBits) >> 16 + dstState := int32(of.state>>(nbBitsOut&15)) + int32(ofB.deltaFindState) + wr.addBits16NC(of.state, uint8(nbBitsOut)) + of.state = of.stateTable[dstState] + + // Accumulate extra bits. + outBits := ofB.outBits & 31 + extraBits := uint64(s.offset & bitMask32[outBits]) + extraBitsN := outBits + + mlB := mlTT[s.mlCode] + //ml.encode(mlB) + nbBitsOut = (uint32(ml.state) + mlB.deltaNbBits) >> 16 + dstState = int32(ml.state>>(nbBitsOut&15)) + int32(mlB.deltaFindState) + wr.addBits16NC(ml.state, uint8(nbBitsOut)) + ml.state = ml.stateTable[dstState] + + outBits = mlB.outBits & 31 + extraBits = extraBits<> 16 + dstState = int32(ll.state>>(nbBitsOut&15)) + int32(llB.deltaFindState) + wr.addBits16NC(ll.state, uint8(nbBitsOut)) + ll.state = ll.stateTable[dstState] + + outBits = llB.outBits & 31 + extraBits = extraBits<= 0 { - s = b.sequences[seq] - wr.flush32() - llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode] - // tabelog max is below 8 for each. - of.encode(ofB) - ml.encode(mlB) - ll.encode(llB) - wr.flush32() - - // ml+ll = max 32 bits total - wr.addBits32NC(s.litLen, llB.outBits) - wr.addBits32NC(s.matchLen, mlB.outBits) - wr.flush32() - wr.addBits32NC(s.offset, ofB.outBits) - - if debugSequences { - println("Encoded seq", seq, s) - } - seq-- - } + seq-- } ml.flush(mlEnc.actualTableLog) of.flush(ofEnc.actualTableLog) @@ -752,7 +787,7 @@ func (b *blockEnc) encode(raw, rawAllLits bool) error { // Size is output minus block header. bh.setSize(uint32(len(b.output)-bhOffset) - 3) - if debug { + if debugEncoder { println("Rewriting block header", bh) } _ = bh.appendTo(b.output[bhOffset:bhOffset]) @@ -767,14 +802,13 @@ func (b *blockEnc) genCodes() { // nothing to do return } - if len(b.sequences) > math.MaxUint16 { panic("can only encode up to 64K sequences") } // No bounds checks after here: - llH := b.coders.llEnc.Histogram()[:256] - ofH := b.coders.ofEnc.Histogram()[:256] - mlH := b.coders.mlEnc.Histogram()[:256] + llH := b.coders.llEnc.Histogram() + ofH := b.coders.ofEnc.Histogram() + mlH := b.coders.mlEnc.Histogram() for i := range llH { llH[i] = 0 } @@ -786,7 +820,8 @@ func (b *blockEnc) genCodes() { } var llMax, ofMax, mlMax uint8 - for i, seq := range b.sequences { + for i := range b.sequences { + seq := &b.sequences[i] v := llCode(seq.litLen) seq.llCode = v llH[v]++ @@ -810,7 +845,6 @@ func (b *blockEnc) genCodes() { panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen)) } } - b.sequences[i] = seq } maxCount := func(a []uint32) int { var max uint32 diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go index 658ef78..b80191e 100644 --- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go +++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go @@ -12,8 +12,8 @@ import ( type byteBuffer interface { // Read up to 8 bytes. - // Returns nil if no more input is available. - readSmall(n int) []byte + // Returns io.ErrUnexpectedEOF if this cannot be satisfied. + readSmall(n int) ([]byte, error) // Read >8 bytes. // MAY use the destination slice. @@ -29,17 +29,17 @@ type byteBuffer interface { // in-memory buffer type byteBuf []byte -func (b *byteBuf) readSmall(n int) []byte { +func (b *byteBuf) readSmall(n int) ([]byte, error) { if debugAsserts && n > 8 { panic(fmt.Errorf("small read > 8 (%d). use readBig", n)) } bb := *b if len(bb) < n { - return nil + return nil, io.ErrUnexpectedEOF } r := bb[:n] *b = bb[n:] - return r + return r, nil } func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) { @@ -81,19 +81,22 @@ type readerWrapper struct { tmp [8]byte } -func (r *readerWrapper) readSmall(n int) []byte { +func (r *readerWrapper) readSmall(n int) ([]byte, error) { if debugAsserts && n > 8 { panic(fmt.Errorf("small read > 8 (%d). use readBig", n)) } n2, err := io.ReadFull(r.r, r.tmp[:n]) // We only really care about the actual bytes read. - if n2 != n { - if debug { + if err != nil { + if err == io.EOF { + return nil, io.ErrUnexpectedEOF + } + if debugDecoder { println("readSmall: got", n2, "want", n, "err", err) } - return nil + return nil, err } - return r.tmp[:n] + return r.tmp[:n], nil } func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) { @@ -110,6 +113,9 @@ func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) { func (r *readerWrapper) readByte() (byte, error) { n2, err := r.r.Read(r.tmp[:1]) if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } return 0, err } if n2 != 1 { diff --git a/vendor/github.com/klauspost/compress/zstd/decodeheader.go b/vendor/github.com/klauspost/compress/zstd/decodeheader.go new file mode 100644 index 0000000..5022e71 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go @@ -0,0 +1,230 @@ +// Copyright 2020+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package zstd + +import ( + "bytes" + "encoding/binary" + "errors" + "io" +) + +// HeaderMaxSize is the maximum size of a Frame and Block Header. +// If less is sent to Header.Decode it *may* still contain enough information. +const HeaderMaxSize = 14 + 3 + +// Header contains information about the first frame and block within that. +type Header struct { + // SingleSegment specifies whether the data is to be decompressed into a + // single contiguous memory segment. + // It implies that WindowSize is invalid and that FrameContentSize is valid. + SingleSegment bool + + // WindowSize is the window of data to keep while decoding. + // Will only be set if SingleSegment is false. + WindowSize uint64 + + // Dictionary ID. + // If 0, no dictionary. + DictionaryID uint32 + + // HasFCS specifies whether FrameContentSize has a valid value. + HasFCS bool + + // FrameContentSize is the expected uncompressed size of the entire frame. + FrameContentSize uint64 + + // Skippable will be true if the frame is meant to be skipped. + // This implies that FirstBlock.OK is false. + Skippable bool + + // SkippableID is the user-specific ID for the skippable frame. + // Valid values are between 0 to 15, inclusive. + SkippableID int + + // SkippableSize is the length of the user data to skip following + // the header. + SkippableSize uint32 + + // HeaderSize is the raw size of the frame header. + // + // For normal frames, it includes the size of the magic number and + // the size of the header (per section 3.1.1.1). + // It does not include the size for any data blocks (section 3.1.1.2) nor + // the size for the trailing content checksum. + // + // For skippable frames, this counts the size of the magic number + // along with the size of the size field of the payload. + // It does not include the size of the skippable payload itself. + // The total frame size is the HeaderSize plus the SkippableSize. + HeaderSize int + + // First block information. + FirstBlock struct { + // OK will be set if first block could be decoded. + OK bool + + // Is this the last block of a frame? + Last bool + + // Is the data compressed? + // If true CompressedSize will be populated. + // Unfortunately DecompressedSize cannot be determined + // without decoding the blocks. + Compressed bool + + // DecompressedSize is the expected decompressed size of the block. + // Will be 0 if it cannot be determined. + DecompressedSize int + + // CompressedSize of the data in the block. + // Does not include the block header. + // Will be equal to DecompressedSize if not Compressed. + CompressedSize int + } + + // If set there is a checksum present for the block content. + // The checksum field at the end is always 4 bytes long. + HasCheckSum bool +} + +// Decode the header from the beginning of the stream. +// This will decode the frame header and the first block header if enough bytes are provided. +// It is recommended to provide at least HeaderMaxSize bytes. +// If the frame header cannot be read an error will be returned. +// If there isn't enough input, io.ErrUnexpectedEOF is returned. +// The FirstBlock.OK will indicate if enough information was available to decode the first block header. +func (h *Header) Decode(in []byte) error { + *h = Header{} + if len(in) < 4 { + return io.ErrUnexpectedEOF + } + h.HeaderSize += 4 + b, in := in[:4], in[4:] + if !bytes.Equal(b, frameMagic) { + if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 { + return ErrMagicMismatch + } + if len(in) < 4 { + return io.ErrUnexpectedEOF + } + h.HeaderSize += 4 + h.Skippable = true + h.SkippableID = int(b[0] & 0xf) + h.SkippableSize = binary.LittleEndian.Uint32(in) + return nil + } + + // Read Window_Descriptor + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor + if len(in) < 1 { + return io.ErrUnexpectedEOF + } + fhd, in := in[0], in[1:] + h.HeaderSize++ + h.SingleSegment = fhd&(1<<5) != 0 + h.HasCheckSum = fhd&(1<<2) != 0 + if fhd&(1<<3) != 0 { + return errors.New("reserved bit set on frame header") + } + + if !h.SingleSegment { + if len(in) < 1 { + return io.ErrUnexpectedEOF + } + var wd byte + wd, in = in[0], in[1:] + h.HeaderSize++ + windowLog := 10 + (wd >> 3) + windowBase := uint64(1) << windowLog + windowAdd := (windowBase / 8) * uint64(wd&0x7) + h.WindowSize = windowBase + windowAdd + } + + // Read Dictionary_ID + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id + if size := fhd & 3; size != 0 { + if size == 3 { + size = 4 + } + if len(in) < int(size) { + return io.ErrUnexpectedEOF + } + b, in = in[:size], in[size:] + h.HeaderSize += int(size) + switch size { + case 1: + h.DictionaryID = uint32(b[0]) + case 2: + h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) + case 4: + h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) + } + } + + // Read Frame_Content_Size + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size + var fcsSize int + v := fhd >> 6 + switch v { + case 0: + if h.SingleSegment { + fcsSize = 1 + } + default: + fcsSize = 1 << v + } + + if fcsSize > 0 { + h.HasFCS = true + if len(in) < fcsSize { + return io.ErrUnexpectedEOF + } + b, in = in[:fcsSize], in[fcsSize:] + h.HeaderSize += int(fcsSize) + switch fcsSize { + case 1: + h.FrameContentSize = uint64(b[0]) + case 2: + // When FCS_Field_Size is 2, the offset of 256 is added. + h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256 + case 4: + h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24) + case 8: + d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) + d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24) + h.FrameContentSize = uint64(d1) | (uint64(d2) << 32) + } + } + + // Frame Header done, we will not fail from now on. + if len(in) < 3 { + return nil + } + tmp := in[:3] + bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16) + h.FirstBlock.Last = bh&1 != 0 + blockType := blockType((bh >> 1) & 3) + // find size. + cSize := int(bh >> 3) + switch blockType { + case blockTypeReserved: + return nil + case blockTypeRLE: + h.FirstBlock.Compressed = true + h.FirstBlock.DecompressedSize = cSize + h.FirstBlock.CompressedSize = 1 + case blockTypeCompressed: + h.FirstBlock.Compressed = true + h.FirstBlock.CompressedSize = cSize + case blockTypeRaw: + h.FirstBlock.DecompressedSize = cSize + h.FirstBlock.CompressedSize = cSize + default: + panic("Invalid block type") + } + + h.FirstBlock.OK = true + return nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index 66b51bf..36119f3 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -6,9 +6,12 @@ package zstd import ( "bytes" - "errors" + "context" + "encoding/binary" "io" "sync" + + "github.com/klauspost/compress/zstd/internal/xxhash" ) // Decoder provides decoding of zstandard streams. @@ -23,12 +26,19 @@ type Decoder struct { // Unreferenced decoders, ready for use. decoders chan *blockDec - // Streams ready to be decoded. - stream chan decodeStream - // Current read position used for Reader functionality. current decoderState + // sync stream decoding + syncStream struct { + decodedFrame uint64 + br readerWrapper + enabled bool + inFrame bool + } + + frame *frameDec + // Custom dictionaries. // Always uses copies. dicts map[uint32]dict @@ -47,7 +57,10 @@ type decoderState struct { output chan decodeOutput // cancel remaining output. - cancel chan struct{} + cancel context.CancelFunc + + // crc of current frame + crc *xxhash.Digest flushed bool } @@ -82,9 +95,13 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { return nil, err } } - d.current.output = make(chan decodeOutput, d.o.concurrent) + d.current.crc = xxhash.New() d.current.flushed = true + if r == nil { + d.current.err = ErrDecoderNilInput + } + // Transfer option dicts. d.dicts = make(map[uint32]dict, len(d.o.dicts)) for _, dc := range d.o.dicts { @@ -110,9 +127,6 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { // Returns the number of bytes written and any error that occurred. // When the stream is done, io.EOF will be returned. func (d *Decoder) Read(p []byte) (int, error) { - if d.stream == nil { - return 0, errors.New("no input has been initialized") - } var n int for { if len(d.current.b) > 0 { @@ -130,12 +144,12 @@ func (d *Decoder) Read(p []byte) (int, error) { break } if !d.nextBlock(n == 0) { - return n, nil + return n, d.current.err } } } if len(d.current.b) > 0 { - if debug { + if debugDecoder { println("returning", n, "still bytes left:", len(d.current.b)) } // Only return error at end of block @@ -144,7 +158,7 @@ func (d *Decoder) Read(p []byte) (int, error) { if d.current.err != nil { d.drainOutput() } - if debug { + if debugDecoder { println("returning", n, d.current.err, len(d.decoders)) } return n, d.current.err @@ -152,28 +166,33 @@ func (d *Decoder) Read(p []byte) (int, error) { // Reset will reset the decoder the supplied stream after the current has finished processing. // Note that this functionality cannot be used after Close has been called. +// Reset can be called with a nil reader to release references to the previous reader. +// After being called with a nil reader, no other operations than Reset or DecodeAll or Close +// should be used. func (d *Decoder) Reset(r io.Reader) error { if d.current.err == ErrDecoderClosed { return d.current.err } - if r == nil { - return errors.New("nil Reader sent as input") - } - - if d.stream == nil { - d.stream = make(chan decodeStream, 1) - d.streamWg.Add(1) - go d.startStreamDecoder(d.stream) - } d.drainOutput() - // If bytes buffer and < 1MB, do sync decoding anyway. - if bb, ok := r.(*bytes.Buffer); ok && bb.Len() < 1<<20 { - if debug { + d.syncStream.br.r = nil + if r == nil { + d.current.err = ErrDecoderNilInput + if len(d.current.b) > 0 { + d.current.b = d.current.b[:0] + } + d.current.flushed = true + return nil + } + + // If bytes buffer and < 5MB, do sync decoding anyway. + if bb, ok := r.(byter); ok && bb.Len() < 5<<20 { + bb2 := bb + if debugDecoder { println("*bytes.Buffer detected, doing sync decode, len:", bb.Len()) } - b := bb.Bytes() + b := bb2.Bytes() var dst []byte if cap(d.current.b) > 0 { dst = d.current.b @@ -186,36 +205,48 @@ func (d *Decoder) Reset(r io.Reader) error { d.current.b = dst d.current.err = err d.current.flushed = true - if debug { - println("sync decode to ", len(dst), "bytes, err:", err) + if debugDecoder { + println("sync decode to", len(dst), "bytes, err:", err) } return nil } - // Remove current block. + d.stashDecoder() d.current.decodeOutput = decodeOutput{} d.current.err = nil - d.current.cancel = make(chan struct{}) d.current.flushed = false d.current.d = nil - d.stream <- decodeStream{ - r: r, - output: d.current.output, - cancel: d.current.cancel, + // Ensure no-one else is still running... + d.streamWg.Wait() + if d.frame == nil { + d.frame = newFrameDec(d.o) + } + + if d.o.concurrent == 1 { + return d.startSyncDecoder(r) } + + d.current.output = make(chan decodeOutput, d.o.concurrent) + ctx, cancel := context.WithCancel(context.Background()) + d.current.cancel = cancel + d.streamWg.Add(1) + go d.startStreamDecoder(ctx, r, d.current.output) + return nil } // drainOutput will drain the output until errEndOfStream is sent. func (d *Decoder) drainOutput() { if d.current.cancel != nil { - println("cancelling current") - close(d.current.cancel) + if debugDecoder { + println("cancelling current") + } + d.current.cancel() d.current.cancel = nil } if d.current.d != nil { - if debug { + if debugDecoder { printf("re-adding current decoder %p, decoders: %d", d.current.d, len(d.decoders)) } d.decoders <- d.current.d @@ -226,39 +257,31 @@ func (d *Decoder) drainOutput() { println("current already flushed") return } - for { - select { - case v := <-d.current.output: - if v.d != nil { - if debug { - printf("re-adding decoder %p", v.d) - } - d.decoders <- v.d - } - if v.err == errEndOfStream { - println("current flushed") - d.current.flushed = true - return + for v := range d.current.output { + if v.d != nil { + if debugDecoder { + printf("re-adding decoder %p", v.d) } + d.decoders <- v.d } } + d.current.output = nil + d.current.flushed = true } // WriteTo writes data to w until there's no more data to write or when an error occurs. // The return value n is the number of bytes written. // Any error encountered during the write is also returned. func (d *Decoder) WriteTo(w io.Writer) (int64, error) { - if d.stream == nil { - return 0, errors.New("no input has been initialized") - } var n int64 for { if len(d.current.b) > 0 { n2, err2 := w.Write(d.current.b) n += int64(n2) - if err2 != nil && d.current.err == nil { + if err2 != nil && (d.current.err == nil || d.current.err == io.EOF) { d.current.err = err2 - break + } else if n2 != len(d.current.b) { + d.current.err = io.ErrShortWrite } } if d.current.err != nil { @@ -282,7 +305,7 @@ func (d *Decoder) WriteTo(w io.Writer) (int64, error) { // DecodeAll can be used concurrently. // The Decoder concurrency limits will be respected. func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { - if d.current.err == ErrDecoderClosed { + if d.decoders == nil { return dst, ErrDecoderClosed } @@ -290,11 +313,14 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { block := <-d.decoders frame := block.localFrame defer func() { - if debug { + if debugDecoder { printf("re-adding decoder: %p", block) } frame.rawInput = nil frame.bBuf = nil + if frame.history.decoders.br != nil { + frame.history.decoders.br.in = nil + } d.decoders <- block }() frame.bBuf = input @@ -302,37 +328,50 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { for { frame.history.reset() err := frame.reset(&frame.bBuf) - if err == io.EOF { - return dst, nil + if err != nil { + if err == io.EOF { + if debugDecoder { + println("frame reset return EOF") + } + return dst, nil + } + return dst, err } if frame.DictionaryID != nil { dict, ok := d.dicts[*frame.DictionaryID] if !ok { return nil, ErrUnknownDictionary } + if debugDecoder { + println("setting dict", frame.DictionaryID) + } frame.history.setDict(&dict) } - if err != nil { - return dst, err - } - if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) { - return dst, ErrDecoderSizeExceeded + if frame.WindowSize > d.o.maxWindowSize { + return dst, ErrWindowSizeExceeded } - if frame.FrameContentSize > 0 && frame.FrameContentSize < 1<<30 { - // Never preallocate moe than 1 GB up front. - if uint64(cap(dst)) < frame.FrameContentSize { - dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize)) + if frame.FrameContentSize != fcsUnknown { + if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) { + return dst, ErrDecoderSizeExceeded + } + if cap(dst)-len(dst) < int(frame.FrameContentSize) { + dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize)+compressedBlockOverAlloc) copy(dst2, dst) dst = dst2 } } + if cap(dst) == 0 { - // Allocate window size * 2 by default if nothing is provided and we didn't get frame content size. - size := frame.WindowSize * 2 + // Allocate len(input) * 2 by default if nothing is provided + // and we didn't get frame content size. + size := len(input) * 2 // Cap to 1 MB. if size > 1<<20 { size = 1 << 20 } + if uint64(size) > d.o.maxDecodedSize { + size = int(d.o.maxDecodedSize) + } dst = make([]byte, 0, size) } @@ -341,6 +380,9 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { return dst, err } if len(frame.bBuf) == 0 { + if debugDecoder { + println("frame dbuf empty") + } break } } @@ -353,33 +395,176 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { // If non-blocking mode is used the returned boolean will be false // if no data was available without blocking. func (d *Decoder) nextBlock(blocking bool) (ok bool) { - if d.current.d != nil { - if debug { - printf("re-adding current decoder %p", d.current.d) - } - d.decoders <- d.current.d - d.current.d = nil - } if d.current.err != nil { // Keep error state. - return blocking + return false + } + d.current.b = d.current.b[:0] + + // SYNC: + if d.syncStream.enabled { + if !blocking { + return false + } + ok = d.nextBlockSync() + if !ok { + d.stashDecoder() + } + return ok } + //ASYNC: + d.stashDecoder() if blocking { - d.current.decodeOutput = <-d.current.output + d.current.decodeOutput, ok = <-d.current.output } else { select { - case d.current.decodeOutput = <-d.current.output: + case d.current.decodeOutput, ok = <-d.current.output: default: return false } } - if debug { - println("got", len(d.current.b), "bytes, error:", d.current.err) + if !ok { + // This should not happen, so signal error state... + d.current.err = io.ErrUnexpectedEOF + return false + } + next := d.current.decodeOutput + if next.d != nil && next.d.async.newHist != nil { + d.current.crc.Reset() + } + if debugDecoder { + var tmp [4]byte + binary.LittleEndian.PutUint32(tmp[:], uint32(xxhash.Sum64(next.b))) + println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp) + } + + if !d.o.ignoreChecksum && len(next.b) > 0 { + n, err := d.current.crc.Write(next.b) + if err == nil { + if n != len(next.b) { + d.current.err = io.ErrShortWrite + } + } + } + if next.err == nil && next.d != nil && len(next.d.checkCRC) != 0 { + got := d.current.crc.Sum64() + var tmp [4]byte + binary.LittleEndian.PutUint32(tmp[:], uint32(got)) + if !d.o.ignoreChecksum && !bytes.Equal(tmp[:], next.d.checkCRC) { + if debugDecoder { + println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)") + } + d.current.err = ErrCRCMismatch + } else { + if debugDecoder { + println("CRC ok", tmp[:]) + } + } + } + + return true +} + +func (d *Decoder) nextBlockSync() (ok bool) { + if d.current.d == nil { + d.current.d = <-d.decoders + } + for len(d.current.b) == 0 { + if !d.syncStream.inFrame { + d.frame.history.reset() + d.current.err = d.frame.reset(&d.syncStream.br) + if d.current.err != nil { + return false + } + if d.frame.DictionaryID != nil { + dict, ok := d.dicts[*d.frame.DictionaryID] + if !ok { + d.current.err = ErrUnknownDictionary + return false + } else { + d.frame.history.setDict(&dict) + } + } + if d.frame.WindowSize > d.o.maxDecodedSize || d.frame.WindowSize > d.o.maxWindowSize { + d.current.err = ErrDecoderSizeExceeded + return false + } + + d.syncStream.decodedFrame = 0 + d.syncStream.inFrame = true + } + d.current.err = d.frame.next(d.current.d) + if d.current.err != nil { + return false + } + d.frame.history.ensureBlock() + if debugDecoder { + println("History trimmed:", len(d.frame.history.b), "decoded already:", d.syncStream.decodedFrame) + } + histBefore := len(d.frame.history.b) + d.current.err = d.current.d.decodeBuf(&d.frame.history) + + if d.current.err != nil { + println("error after:", d.current.err) + return false + } + d.current.b = d.frame.history.b[histBefore:] + if debugDecoder { + println("history after:", len(d.frame.history.b)) + } + + // Check frame size (before CRC) + d.syncStream.decodedFrame += uint64(len(d.current.b)) + if d.syncStream.decodedFrame > d.frame.FrameContentSize { + if debugDecoder { + printf("DecodedFrame (%d) > FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize) + } + d.current.err = ErrFrameSizeExceeded + return false + } + + // Check FCS + if d.current.d.Last && d.frame.FrameContentSize != fcsUnknown && d.syncStream.decodedFrame != d.frame.FrameContentSize { + if debugDecoder { + printf("DecodedFrame (%d) != FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize) + } + d.current.err = ErrFrameSizeMismatch + return false + } + + // Update/Check CRC + if d.frame.HasCheckSum { + if !d.o.ignoreChecksum { + d.frame.crc.Write(d.current.b) + } + if d.current.d.Last { + if !d.o.ignoreChecksum { + d.current.err = d.frame.checkCRC() + } else { + d.current.err = d.frame.consumeCRC() + } + if d.current.err != nil { + println("CRC error:", d.current.err) + return false + } + } + } + d.syncStream.inFrame = !d.current.d.Last } return true } +func (d *Decoder) stashDecoder() { + if d.current.d != nil { + if debugDecoder { + printf("re-adding current decoder %p", d.current.d) + } + d.decoders <- d.current.d + d.current.d = nil + } +} + // Close will release all resources. // It is NOT possible to reuse the decoder after this. func (d *Decoder) Close() { @@ -387,10 +572,10 @@ func (d *Decoder) Close() { return } d.drainOutput() - if d.stream != nil { - close(d.stream) + if d.current.cancel != nil { + d.current.cancel() d.streamWg.Wait() - d.stream = nil + d.current.cancel = nil } if d.decoders != nil { close(d.decoders) @@ -441,100 +626,307 @@ type decodeOutput struct { err error } -type decodeStream struct { - r io.Reader - - // Blocks ready to be written to output. - output chan decodeOutput - - // cancel reading from the input - cancel chan struct{} +func (d *Decoder) startSyncDecoder(r io.Reader) error { + d.frame.history.reset() + d.syncStream.br = readerWrapper{r: r} + d.syncStream.inFrame = false + d.syncStream.enabled = true + d.syncStream.decodedFrame = 0 + return nil } -// errEndOfStream indicates that everything from the stream was read. -var errEndOfStream = errors.New("end-of-stream") - // Create Decoder: -// Spawn n block decoders. These accept tasks to decode a block. -// Create goroutine that handles stream processing, this will send history to decoders as they are available. -// Decoders update the history as they decode. -// When a block is returned: -// a) history is sent to the next decoder, -// b) content written to CRC. -// c) return data to WRITER. -// d) wait for next block to return data. -// Once WRITTEN, the decoders reused by the writer frame decoder for re-use. -func (d *Decoder) startStreamDecoder(inStream chan decodeStream) { +// ASYNC: +// Spawn 4 go routines. +// 0: Read frames and decode blocks. +// 1: Decode block and literals. Receives hufftree and seqdecs, returns seqdecs and huff tree. +// 2: Wait for recentOffsets if needed. Decode sequences, send recentOffsets. +// 3: Wait for stream history, execute sequences, send stream history. +func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output chan decodeOutput) { defer d.streamWg.Done() - frame := newFrameDec(d.o) - for stream := range inStream { - if debug { - println("got new stream") + br := readerWrapper{r: r} + + var seqPrepare = make(chan *blockDec, d.o.concurrent) + var seqDecode = make(chan *blockDec, d.o.concurrent) + var seqExecute = make(chan *blockDec, d.o.concurrent) + + // Async 1: Prepare blocks... + go func() { + var hist history + var hasErr bool + for block := range seqPrepare { + if hasErr { + if block != nil { + seqDecode <- block + } + continue + } + if block.async.newHist != nil { + if debugDecoder { + println("Async 1: new history") + } + hist.reset() + if block.async.newHist.dict != nil { + hist.setDict(block.async.newHist.dict) + } + } + if block.err != nil || block.Type != blockTypeCompressed { + hasErr = block.err != nil + seqDecode <- block + continue + } + + remain, err := block.decodeLiterals(block.data, &hist) + block.err = err + hasErr = block.err != nil + if err == nil { + block.async.literals = hist.decoders.literals + block.async.seqData = remain + } else if debugDecoder { + println("decodeLiterals error:", err) + } + seqDecode <- block } - br := readerWrapper{r: stream.r} - decodeStream: - for { - frame.history.reset() - err := frame.reset(&br) - if debug && err != nil { - println("Frame decoder returned", err) + close(seqDecode) + }() + + // Async 2: Decode sequences... + go func() { + var hist history + var hasErr bool + + for block := range seqDecode { + if hasErr { + if block != nil { + seqExecute <- block + } + continue } - if err == nil && frame.DictionaryID != nil { - dict, ok := d.dicts[*frame.DictionaryID] - if !ok { - err = ErrUnknownDictionary - } else { - frame.history.setDict(&dict) + if block.async.newHist != nil { + if debugDecoder { + println("Async 2: new history, recent:", block.async.newHist.recentOffsets) + } + hist.decoders = block.async.newHist.decoders + hist.recentOffsets = block.async.newHist.recentOffsets + hist.windowSize = block.async.newHist.windowSize + if block.async.newHist.dict != nil { + hist.setDict(block.async.newHist.dict) } } - if err != nil { - stream.output <- decodeOutput{ - err: err, + if block.err != nil || block.Type != blockTypeCompressed { + hasErr = block.err != nil + seqExecute <- block + continue + } + + hist.decoders.literals = block.async.literals + block.err = block.prepareSequences(block.async.seqData, &hist) + if debugDecoder && block.err != nil { + println("prepareSequences returned:", block.err) + } + hasErr = block.err != nil + if block.err == nil { + block.err = block.decodeSequences(&hist) + if debugDecoder && block.err != nil { + println("decodeSequences returned:", block.err) } - break + hasErr = block.err != nil + // block.async.sequence = hist.decoders.seq[:hist.decoders.nSeqs] + block.async.seqSize = hist.decoders.seqSize } - if debug { - println("starting frame decoder") + seqExecute <- block + } + close(seqExecute) + }() + + var wg sync.WaitGroup + wg.Add(1) + + // Async 3: Execute sequences... + frameHistCache := d.frame.history.b + go func() { + var hist history + var decodedFrame uint64 + var fcs uint64 + var hasErr bool + for block := range seqExecute { + out := decodeOutput{err: block.err, d: block} + if block.err != nil || hasErr { + hasErr = true + output <- out + continue } + if block.async.newHist != nil { + if debugDecoder { + println("Async 3: new history") + } + hist.windowSize = block.async.newHist.windowSize + hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer + if block.async.newHist.dict != nil { + hist.setDict(block.async.newHist.dict) + } - // This goroutine will forward history between frames. - frame.frameDone.Add(1) - frame.initAsync() + if cap(hist.b) < hist.allocFrameBuffer { + if cap(frameHistCache) >= hist.allocFrameBuffer { + hist.b = frameHistCache + } else { + hist.b = make([]byte, 0, hist.allocFrameBuffer) + println("Alloc history sized", hist.allocFrameBuffer) + } + } + hist.b = hist.b[:0] + fcs = block.async.fcs + decodedFrame = 0 + } + do := decodeOutput{err: block.err, d: block} + switch block.Type { + case blockTypeRLE: + if debugDecoder { + println("add rle block length:", block.RLESize) + } - go frame.startDecoder(stream.output) - decodeFrame: - // Go through all blocks of the frame. - for { - dec := <-d.decoders - select { - case <-stream.cancel: - if !frame.sendErr(dec, io.EOF) { - // To not let the decoder dangle, send it back. - stream.output <- decodeOutput{d: dec} + if cap(block.dst) < int(block.RLESize) { + if block.lowMem { + block.dst = make([]byte, block.RLESize) + } else { + block.dst = make([]byte, maxBlockSize) } - break decodeStream - default: } - err := frame.next(dec) - switch err { - case io.EOF: - // End of current frame, no error - println("EOF on next block") - break decodeFrame - case nil: - continue - default: - println("block decoder returned", err) - break decodeStream + block.dst = block.dst[:block.RLESize] + v := block.data[0] + for i := range block.dst { + block.dst[i] = v + } + hist.append(block.dst) + do.b = block.dst + case blockTypeRaw: + if debugDecoder { + println("add raw block length:", len(block.data)) + } + hist.append(block.data) + do.b = block.data + case blockTypeCompressed: + if debugDecoder { + println("execute with history length:", len(hist.b), "window:", hist.windowSize) + } + hist.decoders.seqSize = block.async.seqSize + hist.decoders.literals = block.async.literals + do.err = block.executeSequences(&hist) + hasErr = do.err != nil + if debugDecoder && hasErr { + println("executeSequences returned:", do.err) + } + do.b = block.dst + } + if !hasErr { + decodedFrame += uint64(len(do.b)) + if decodedFrame > fcs { + println("fcs exceeded", block.Last, fcs, decodedFrame) + do.err = ErrFrameSizeExceeded + hasErr = true + } else if block.Last && fcs != fcsUnknown && decodedFrame != fcs { + do.err = ErrFrameSizeMismatch + hasErr = true + } else { + if debugDecoder { + println("fcs ok", block.Last, fcs, decodedFrame) + } + } + } + output <- do + } + close(output) + frameHistCache = hist.b + wg.Done() + if debugDecoder { + println("decoder goroutines finished") + } + }() + +decodeStream: + for { + frame := d.frame + if debugDecoder { + println("New frame...") + } + var historySent bool + frame.history.reset() + err := frame.reset(&br) + if debugDecoder && err != nil { + println("Frame decoder returned", err) + } + if err == nil && frame.DictionaryID != nil { + dict, ok := d.dicts[*frame.DictionaryID] + if !ok { + err = ErrUnknownDictionary + } else { + frame.history.setDict(&dict) + } + } + if err == nil && d.frame.WindowSize > d.o.maxWindowSize { + err = ErrDecoderSizeExceeded + } + if err != nil { + select { + case <-ctx.Done(): + case dec := <-d.decoders: + dec.sendErr(err) + seqPrepare <- dec + } + break decodeStream + } + + // Go through all blocks of the frame. + for { + var dec *blockDec + select { + case <-ctx.Done(): + break decodeStream + case dec = <-d.decoders: + // Once we have a decoder, we MUST return it. + } + err := frame.next(dec) + if !historySent { + h := frame.history + if debugDecoder { + println("Alloc History:", h.allocFrameBuffer) + } + dec.async.newHist = &h + dec.async.fcs = frame.FrameContentSize + historySent = true + } else { + dec.async.newHist = nil + } + if debugDecoder && err != nil { + println("next block returned error:", err) + } + dec.err = err + dec.checkCRC = nil + if dec.Last && frame.HasCheckSum && err == nil { + crc, err := frame.rawInput.readSmall(4) + if err != nil { + println("CRC missing?", err) + dec.err = err + } + var tmp [4]byte + copy(tmp[:], crc) + dec.checkCRC = tmp[:] + if debugDecoder { + println("found crc to check:", dec.checkCRC) } } - // All blocks have started decoding, check if there are more frames. - println("waiting for done") - frame.frameDone.Wait() - println("done waiting...") + err = dec.err + last := dec.Last + seqPrepare <- dec + if err != nil { + break decodeStream + } + if last { + break + } } - frame.frameDone.Wait() - println("Sending EOS") - stream.output <- decodeOutput{err: errEndOfStream} } + close(seqPrepare) + wg.Wait() + d.frame.history.b = frameHistCache } diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go index 284d384..c70e6fa 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go @@ -6,7 +6,6 @@ package zstd import ( "errors" - "fmt" "runtime" ) @@ -18,16 +17,22 @@ type decoderOptions struct { lowMem bool concurrent int maxDecodedSize uint64 + maxWindowSize uint64 dicts []dict + ignoreChecksum bool } func (o *decoderOptions) setDefault() { *o = decoderOptions{ // use less ram: true for now, but may change. - lowMem: true, - concurrent: runtime.GOMAXPROCS(0), + lowMem: true, + concurrent: runtime.GOMAXPROCS(0), + maxWindowSize: MaxWindowSize, } - o.maxDecodedSize = 1 << 63 + if o.concurrent > 4 { + o.concurrent = 4 + } + o.maxDecodedSize = 64 << 30 } // WithDecoderLowmem will set whether to use a lower amount of memory, @@ -36,16 +41,25 @@ func WithDecoderLowmem(b bool) DOption { return func(o *decoderOptions) error { o.lowMem = b; return nil } } -// WithDecoderConcurrency will set the concurrency, -// meaning the maximum number of decoders to run concurrently. -// The value supplied must be at least 1. -// By default this will be set to GOMAXPROCS. +// WithDecoderConcurrency sets the number of created decoders. +// When decoding block with DecodeAll, this will limit the number +// of possible concurrently running decodes. +// When decoding streams, this will limit the number of +// inflight blocks. +// When decoding streams and setting maximum to 1, +// no async decoding will be done. +// When a value of 0 is provided GOMAXPROCS will be used. +// By default this will be set to 4 or GOMAXPROCS, whatever is lower. func WithDecoderConcurrency(n int) DOption { return func(o *decoderOptions) error { - if n <= 0 { - return fmt.Errorf("Concurrency must be at least 1") + if n < 0 { + return errors.New("concurrency must be at least 1") + } + if n == 0 { + o.concurrent = runtime.GOMAXPROCS(0) + } else { + o.concurrent = n } - o.concurrent = n return nil } } @@ -53,15 +67,14 @@ func WithDecoderConcurrency(n int) DOption { // WithDecoderMaxMemory allows to set a maximum decoded size for in-memory // non-streaming operations or maximum window size for streaming operations. // This can be used to control memory usage of potentially hostile content. -// For streaming operations, the maximum window size is capped at 1<<30 bytes. -// Maximum and default is 1 << 63 bytes. +// Maximum is 1 << 63 bytes. Default is 64GiB. func WithDecoderMaxMemory(n uint64) DOption { return func(o *decoderOptions) error { if n == 0 { return errors.New("WithDecoderMaxMemory must be at least 1") } if n > 1<<63 { - return fmt.Errorf("WithDecoderMaxmemory must be less than 1 << 63") + return errors.New("WithDecoderMaxmemory must be less than 1 << 63") } o.maxDecodedSize = n return nil @@ -82,3 +95,29 @@ func WithDecoderDicts(dicts ...[]byte) DOption { return nil } } + +// WithDecoderMaxWindow allows to set a maximum window size for decodes. +// This allows rejecting packets that will cause big memory usage. +// The Decoder will likely allocate more memory based on the WithDecoderLowmem setting. +// If WithDecoderMaxMemory is set to a lower value, that will be used. +// Default is 512MB, Maximum is ~3.75 TB as per zstandard spec. +func WithDecoderMaxWindow(size uint64) DOption { + return func(o *decoderOptions) error { + if size < MinWindowSize { + return errors.New("WithMaxWindowSize must be at least 1KB, 1024 bytes") + } + if size > (1<<41)+7*(1<<38) { + return errors.New("WithMaxWindowSize must be less than (1<<41) + 7*(1<<38) ~ 3.75TB") + } + o.maxWindowSize = size + return nil + } +} + +// IgnoreChecksum allows to forcibly ignore checksum checking. +func IgnoreChecksum(b bool) DOption { + return func(o *decoderOptions) error { + o.ignoreChecksum = b + return nil + } +} diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go index 8eb6f6b..a36ae83 100644 --- a/vendor/github.com/klauspost/compress/zstd/dict.go +++ b/vendor/github.com/klauspost/compress/zstd/dict.go @@ -13,14 +13,31 @@ import ( type dict struct { id uint32 - litDec *huff0.Scratch + litEnc *huff0.Scratch llDec, ofDec, mlDec sequenceDec - offsets [3]int - content []byte + //llEnc, ofEnc, mlEnc []*fseEncoder + offsets [3]int + content []byte } var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec} +// ID returns the dictionary id or 0 if d is nil. +func (d *dict) ID() uint32 { + if d == nil { + return 0 + } + return d.id +} + +// DictContentSize returns the dictionary content size or 0 if d is nil. +func (d *dict) DictContentSize() int { + if d == nil { + return 0 + } + return len(d.content) +} + // Load a dictionary as described in // https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format func loadDict(b []byte) (*dict, error) { @@ -43,10 +60,11 @@ func loadDict(b []byte) (*dict, error) { // Read literal table var err error - d.litDec, b, err = huff0.ReadTable(b[8:], nil) + d.litEnc, b, err = huff0.ReadTable(b[8:], nil) if err != nil { return nil, err } + d.litEnc.Reuse = huff0.ReusePolicyMust br := byteReader{ b: b, @@ -64,7 +82,7 @@ func loadDict(b []byte) (*dict, error) { println("Transform table error:", err) return err } - if debug { + if debugDecoder || debugEncoder { println("Read table ok", "symbolLen:", dec.symbolLen) } // Set decoders as predefined so they aren't reused. diff --git a/vendor/github.com/klauspost/compress/zstd/enc_base.go b/vendor/github.com/klauspost/compress/zstd/enc_base.go new file mode 100644 index 0000000..15ae8ee --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go @@ -0,0 +1,188 @@ +package zstd + +import ( + "fmt" + "math/bits" + + "github.com/klauspost/compress/zstd/internal/xxhash" +) + +const ( + dictShardBits = 6 +) + +type fastBase struct { + // cur is the offset at the start of hist + cur int32 + // maximum offset. Should be at least 2x block size. + maxMatchOff int32 + hist []byte + crc *xxhash.Digest + tmp [8]byte + blk *blockEnc + lastDictID uint32 + lowMem bool +} + +// CRC returns the underlying CRC writer. +func (e *fastBase) CRC() *xxhash.Digest { + return e.crc +} + +// AppendCRC will append the CRC to the destination slice and return it. +func (e *fastBase) AppendCRC(dst []byte) []byte { + crc := e.crc.Sum(e.tmp[:0]) + dst = append(dst, crc[7], crc[6], crc[5], crc[4]) + return dst +} + +// WindowSize returns the window size of the encoder, +// or a window size small enough to contain the input size, if > 0. +func (e *fastBase) WindowSize(size int64) int32 { + if size > 0 && size < int64(e.maxMatchOff) { + b := int32(1) << uint(bits.Len(uint(size))) + // Keep minimum window. + if b < 1024 { + b = 1024 + } + return b + } + return e.maxMatchOff +} + +// Block returns the current block. +func (e *fastBase) Block() *blockEnc { + return e.blk +} + +func (e *fastBase) addBlock(src []byte) int32 { + if debugAsserts && e.cur > bufferReset { + panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset)) + } + // check if we have space already + if len(e.hist)+len(src) > cap(e.hist) { + if cap(e.hist) == 0 { + e.ensureHist(len(src)) + } else { + if cap(e.hist) < int(e.maxMatchOff+maxCompressedBlockSize) { + panic(fmt.Errorf("unexpected buffer cap %d, want at least %d with window %d", cap(e.hist), e.maxMatchOff+maxCompressedBlockSize, e.maxMatchOff)) + } + // Move down + offset := int32(len(e.hist)) - e.maxMatchOff + copy(e.hist[0:e.maxMatchOff], e.hist[offset:]) + e.cur += offset + e.hist = e.hist[:e.maxMatchOff] + } + } + s := int32(len(e.hist)) + e.hist = append(e.hist, src...) + return s +} + +// ensureHist will ensure that history can keep at least this many bytes. +func (e *fastBase) ensureHist(n int) { + if cap(e.hist) >= n { + return + } + l := e.maxMatchOff + if (e.lowMem && e.maxMatchOff > maxCompressedBlockSize) || e.maxMatchOff <= maxCompressedBlockSize { + l += maxCompressedBlockSize + } else { + l += e.maxMatchOff + } + // Make it at least 1MB. + if l < 1<<20 && !e.lowMem { + l = 1 << 20 + } + // Make it at least the requested size. + if l < int32(n) { + l = int32(n) + } + e.hist = make([]byte, 0, l) +} + +// useBlock will replace the block with the provided one, +// but transfer recent offsets from the previous. +func (e *fastBase) UseBlock(enc *blockEnc) { + enc.reset(e.blk) + e.blk = enc +} + +func (e *fastBase) matchlen(s, t int32, src []byte) int32 { + if debugAsserts { + if s < 0 { + err := fmt.Sprintf("s (%d) < 0", s) + panic(err) + } + if t < 0 { + err := fmt.Sprintf("s (%d) < 0", s) + panic(err) + } + if s-t > e.maxMatchOff { + err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff) + panic(err) + } + if len(src)-int(s) > maxCompressedBlockSize { + panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize)) + } + } + a := src[s:] + b := src[t:] + b = b[:len(a)] + end := int32((len(a) >> 3) << 3) + for i := int32(0); i < end; i += 8 { + if diff := load6432(a, i) ^ load6432(b, i); diff != 0 { + return i + int32(bits.TrailingZeros64(diff)>>3) + } + } + + a = a[end:] + b = b[end:] + for i := range a { + if a[i] != b[i] { + return int32(i) + end + } + } + return int32(len(a)) + end +} + +// Reset the encoding table. +func (e *fastBase) resetBase(d *dict, singleBlock bool) { + if e.blk == nil { + e.blk = &blockEnc{lowMem: e.lowMem} + e.blk.init() + } else { + e.blk.reset(nil) + } + e.blk.initNewEncode() + if e.crc == nil { + e.crc = xxhash.New() + } else { + e.crc.Reset() + } + if d != nil { + low := e.lowMem + if singleBlock { + e.lowMem = true + } + e.ensureHist(d.DictContentSize() + maxCompressedBlockSize) + e.lowMem = low + } + + // We offset current position so everything will be out of reach. + // If above reset line, history will be purged. + if e.cur < bufferReset { + e.cur += e.maxMatchOff + int32(len(e.hist)) + } + e.hist = e.hist[:0] + if d != nil { + // Set offsets (currently not used) + for i, off := range d.offsets { + e.blk.recentOffsets[i] = uint32(off) + e.blk.prevRecentOffsets[i] = e.blk.recentOffsets[i] + } + // Transfer litenc. + e.blk.dictLitEnc = d.litEnc + e.hist = append(e.hist, d.content...) + } +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go new file mode 100644 index 0000000..96028ec --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go @@ -0,0 +1,558 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "bytes" + "fmt" + + "github.com/klauspost/compress" +) + +const ( + bestLongTableBits = 22 // Bits used in the long match table + bestLongTableSize = 1 << bestLongTableBits // Size of the table + bestLongLen = 8 // Bytes used for table hash + + // Note: Increasing the short table bits or making the hash shorter + // can actually lead to compression degradation since it will 'steal' more from the + // long match table and match offsets are quite big. + // This greatly depends on the type of input. + bestShortTableBits = 18 // Bits used in the short match table + bestShortTableSize = 1 << bestShortTableBits // Size of the table + bestShortLen = 4 // Bytes used for table hash + +) + +type match struct { + offset int32 + s int32 + length int32 + rep int32 + est int32 +} + +const highScore = 25000 + +// estBits will estimate output bits from predefined tables. +func (m *match) estBits(bitsPerByte int32) { + mlc := mlCode(uint32(m.length - zstdMinMatch)) + var ofc uint8 + if m.rep < 0 { + ofc = ofCode(uint32(m.s-m.offset) + 3) + } else { + ofc = ofCode(uint32(m.rep)) + } + // Cost, excluding + ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc] + + // Add cost of match encoding... + m.est = int32(ofTT.outBits + mlTT.outBits) + m.est += int32(ofTT.deltaNbBits>>16 + mlTT.deltaNbBits>>16) + // Subtract savings compared to literal encoding... + m.est -= (m.length * bitsPerByte) >> 10 + if m.est > 0 { + // Unlikely gain.. + m.length = 0 + m.est = highScore + } +} + +// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. +// The long match table contains the previous entry with the same hash, +// effectively making it a "chain" of length 2. +// When we find a long match we choose between the two values and select the longest. +// When we find a short match, after checking the long, we check if we can find a long at n+1 +// and that it is longer (lazy matching). +type bestFastEncoder struct { + fastBase + table [bestShortTableSize]prevEntry + longTable [bestLongTableSize]prevEntry + dictTable []prevEntry + dictLongTable []prevEntry +} + +// Encode improves compression... +func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) { + const ( + // Input margin is the number of bytes we read (8) + // and the maximum we will read ahead (2) + inputMargin = 8 + 4 + minNonLiteralBlockSize = 16 + ) + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = prevEntry{} + } + for i := range e.longTable[:] { + e.longTable[i] = prevEntry{} + } + e.cur = e.maxMatchOff + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + v2 := e.table[i].prev + if v < minOff { + v = 0 + v2 = 0 + } else { + v = v - e.cur + e.maxMatchOff + if v2 < minOff { + v2 = 0 + } else { + v2 = v2 - e.cur + e.maxMatchOff + } + } + e.table[i] = prevEntry{ + offset: v, + prev: v2, + } + } + for i := range e.longTable[:] { + v := e.longTable[i].offset + v2 := e.longTable[i].prev + if v < minOff { + v = 0 + v2 = 0 + } else { + v = v - e.cur + e.maxMatchOff + if v2 < minOff { + v2 = 0 + } else { + v2 = v2 - e.cur + e.maxMatchOff + } + } + e.longTable[i] = prevEntry{ + offset: v, + prev: v2, + } + } + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Use this to estimate literal cost. + // Scaled by 10 bits. + bitsPerByte := int32((compress.ShannonEntropyBits(src) * 1024) / len(src)) + // Huffman can never go < 1 bit/byte + if bitsPerByte < 1024 { + bitsPerByte = 1024 + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + const kSearchStrength = 10 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + offset3 := int32(blk.recentOffsets[2]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + _ = addLiterals + + if debugEncoder { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + // We allow the encoder to optionally turn off repeat offsets across blocks + canRepeat := len(blk.sequences) > 2 + + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + bestOf := func(a, b match) match { + if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 { + return a + } + return b + } + const goodEnough = 100 + + nextHashL := hashLen(cv, bestLongTableBits, bestLongLen) + nextHashS := hashLen(cv, bestShortTableBits, bestShortLen) + candidateL := e.longTable[nextHashL] + candidateS := e.table[nextHashS] + + matchAt := func(offset int32, s int32, first uint32, rep int32) match { + if s-offset >= e.maxMatchOff || load3232(src, offset) != first { + return match{s: s, est: highScore} + } + if debugAsserts { + if !bytes.Equal(src[s:s+4], src[offset:offset+4]) { + panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first)) + } + } + m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep} + m.estBits(bitsPerByte) + return m + } + + best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) + best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)) + best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1)) + + if canRepeat && best.length < goodEnough { + cv32 := uint32(cv >> 8) + spp := s + 1 + best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1)) + best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2)) + best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3)) + if best.length > 0 { + cv32 = uint32(cv >> 24) + spp += 2 + best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1)) + best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2)) + best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3)) + } + } + // Load next and check... + e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset} + e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset} + + // Look far ahead, unless we have a really long match already... + if best.length < goodEnough { + // No match found, move forward on input, no need to check forward... + if best.length < 4 { + s += 1 + (s-nextEmit)>>(kSearchStrength-1) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + continue + } + + s++ + candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)] + cv = load6432(src, s) + cv2 := load6432(src, s+1) + candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)] + candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)] + + // Short at s+1 + best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)) + // Long at s+1, s+2 + best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)) + best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) + best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)) + best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)) + if false { + // Short at s+3. + // Too often worse... + best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)) + } + // See if we can find a better match by checking where the current best ends. + // Use that offset to see if we can find a better full match. + if sAt := best.s + best.length; sAt < sLimit { + nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen) + candidateEnd := e.longTable[nextHashL] + if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 { + bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1)) + if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 { + bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1)) + } + best = bestEnd + } + } + } + + if debugAsserts { + if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) { + panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length])) + } + } + + // We have a match, we can store the forward value + if best.rep > 0 { + s = best.s + var seq seq + seq.matchLen = uint32(best.length - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := best.s + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + repIndex := best.offset + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = uint32(best.rep) + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Index match start+1 (long) -> s - 1 + index0 := s + s = best.s + best.length + + nextEmit = s + if s >= sLimit { + if debugEncoder { + println("repeat ended", s, best.length) + + } + break encodeLoop + } + // Index skipped... + off := index0 + e.cur + for index0 < s-1 { + cv0 := load6432(src, index0) + h0 := hashLen(cv0, bestLongTableBits, bestLongLen) + h1 := hashLen(cv0, bestShortTableBits, bestShortLen) + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} + off++ + index0++ + } + switch best.rep { + case 2: + offset1, offset2 = offset2, offset1 + case 3: + offset1, offset2, offset3 = offset3, offset1, offset2 + } + cv = load6432(src, s) + continue + } + + // A 4-byte match has been found. Update recent offsets. + // We'll later see if more than 4 bytes. + s = best.s + t := best.offset + offset1, offset2, offset3 = s-t, offset1, offset2 + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the n-byte match as long as possible. + l := best.length + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + + // Index match start+1 (long) -> s - 1 + index0 := s - l + 1 + // every entry + for index0 < s-1 { + cv0 := load6432(src, index0) + h0 := hashLen(cv0, bestLongTableBits, bestLongLen) + h1 := hashLen(cv0, bestShortTableBits, bestShortLen) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} + index0++ + } + + cv = load6432(src, s) + if !canRepeat { + continue + } + + // Check offset 2 + for { + o2 := s - offset2 + if load3232(src, o2) != uint32(cv) { + // Do regular search + break + } + + // Store this, since we have it. + nextHashS := hashLen(cv, bestShortTableBits, bestShortLen) + nextHashL := hashLen(cv, bestLongTableBits, bestLongLen) + + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + l := 4 + e.matchlen(s+4, o2+4, src) + + e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} + e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: e.table[nextHashS].offset} + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + // Finished + break encodeLoop + } + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + blk.recentOffsets[2] = uint32(offset3) + if debugEncoder { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } +} + +// EncodeNoHist will encode a block with no history and no following blocks. +// Most notable difference is that src will not be copied for history and +// we do not need to check for max match length. +func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { + e.ensureHist(len(src)) + e.Encode(blk, src) +} + +// Reset will reset and set a dictionary if not nil +func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) { + e.resetBase(d, singleBlock) + if d == nil { + return + } + // Init or copy dict table + if len(e.dictTable) != len(e.table) || d.id != e.lastDictID { + if len(e.dictTable) != len(e.table) { + e.dictTable = make([]prevEntry, len(e.table)) + } + end := int32(len(d.content)) - 8 + e.maxMatchOff + for i := e.maxMatchOff; i < end; i += 4 { + const hashLog = bestShortTableBits + + cv := load6432(d.content, i-e.maxMatchOff) + nextHash := hashLen(cv, hashLog, bestShortLen) // 0 -> 4 + nextHash1 := hashLen(cv>>8, hashLog, bestShortLen) // 1 -> 5 + nextHash2 := hashLen(cv>>16, hashLog, bestShortLen) // 2 -> 6 + nextHash3 := hashLen(cv>>24, hashLog, bestShortLen) // 3 -> 7 + e.dictTable[nextHash] = prevEntry{ + prev: e.dictTable[nextHash].offset, + offset: i, + } + e.dictTable[nextHash1] = prevEntry{ + prev: e.dictTable[nextHash1].offset, + offset: i + 1, + } + e.dictTable[nextHash2] = prevEntry{ + prev: e.dictTable[nextHash2].offset, + offset: i + 2, + } + e.dictTable[nextHash3] = prevEntry{ + prev: e.dictTable[nextHash3].offset, + offset: i + 3, + } + } + e.lastDictID = d.id + } + + // Init or copy dict table + if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID { + if len(e.dictLongTable) != len(e.longTable) { + e.dictLongTable = make([]prevEntry, len(e.longTable)) + } + if len(d.content) >= 8 { + cv := load6432(d.content, 0) + h := hashLen(cv, bestLongTableBits, bestLongLen) + e.dictLongTable[h] = prevEntry{ + offset: e.maxMatchOff, + prev: e.dictLongTable[h].offset, + } + + end := int32(len(d.content)) - 8 + e.maxMatchOff + off := 8 // First to read + for i := e.maxMatchOff + 1; i < end; i++ { + cv = cv>>8 | (uint64(d.content[off]) << 56) + h := hashLen(cv, bestLongTableBits, bestLongLen) + e.dictLongTable[h] = prevEntry{ + offset: i, + prev: e.dictLongTable[h].offset, + } + off++ + } + } + e.lastDictID = d.id + } + // Reset table to initial state + copy(e.longTable[:], e.dictLongTable) + + e.cur = e.maxMatchOff + // Reset table to initial state + copy(e.table[:], e.dictTable) +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go index c120d90..602c05e 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_better.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -9,6 +9,7 @@ import "fmt" const ( betterLongTableBits = 19 // Bits used in the long match table betterLongTableSize = 1 << betterLongTableBits // Size of the table + betterLongLen = 8 // Bytes used for table hash // Note: Increasing the short table bits or making the hash shorter // can actually lead to compression degradation since it will 'steal' more from the @@ -16,6 +17,13 @@ const ( // This greatly depends on the type of input. betterShortTableBits = 13 // Bits used in the short match table betterShortTableSize = 1 << betterShortTableBits // Size of the table + betterShortLen = 5 // Bytes used for table hash + + betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table + betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard + + betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table + betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard ) type prevEntry struct { @@ -35,6 +43,15 @@ type betterFastEncoder struct { longTable [betterLongTableSize]prevEntry } +type betterFastEncoderDict struct { + betterFastEncoder + dictTable []tableEntry + dictLongTable []prevEntry + shortTableShardDirty [betterShortTableShardCnt]bool + longTableShardDirty [betterLongTableShardCnt]bool + allDirty bool +} + // Encode improves compression... func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { const ( @@ -123,7 +140,7 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { blk.literals = append(blk.literals, src[nextEmit:until]...) s.litLen = uint32(until - nextEmit) } - if debug { + if debugEncoder { println("recent offsets:", blk.recentOffsets) } @@ -139,8 +156,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hash5(cv, betterShortTableBits) - nextHashL := hash8(cv, betterLongTableBits) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) + nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -189,7 +206,7 @@ encodeLoop: nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, lenght) } @@ -199,10 +216,10 @@ encodeLoop: for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 - h0 := hash8(cv0, betterLongTableBits) + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} index0 += 2 } cv = load6432(src, s) @@ -249,7 +266,7 @@ encodeLoop: s += lenght + repOff2 nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, lenght) } @@ -260,10 +277,10 @@ encodeLoop: for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 - h0 := hash8(cv0, betterLongTableBits) + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} index0 += 2 } cv = load6432(src, s) @@ -338,7 +355,7 @@ encodeLoop: // See if we can find a long match at s+1 const checkAt = 1 cv := load6432(src, s+checkAt) - nextHashL = hash8(cv, betterLongTableBits) + nextHashL = hashLen(cv, betterLongTableBits, betterLongLen) candidateL = e.longTable[nextHashL] coffsetL = candidateL.offset - e.cur @@ -397,8 +414,41 @@ encodeLoop: cv = load6432(src, s) } - // A 4-byte match has been found. Update recent offsets. - // We'll later see if more than 4 bytes. + // Try to find a better match by searching for a long match at the end of the current best match + if s+matched < sLimit { + nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen) + cv := load3232(src, s) + candidateL := e.longTable[nextHashL] + coffsetL := candidateL.offset - e.cur - matched + if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + // Found a long match, at least 4 bytes. + matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + if matchedNext > matched { + t = coffsetL + matched = matchedNext + if debugMatches { + println("long match at end-of-match") + } + } + } + + // Check prev long... + if true { + coffsetL = candidateL.prev - e.cur - matched + if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + // Found a long match, at least 4 bytes. + matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + if matchedNext > matched { + t = coffsetL + matched = matchedNext + if debugMatches { + println("prev long match at end-of-match") + } + } + } + } + } + // A match has been found. Update recent offsets. offset2 = offset1 offset1 = s - t @@ -447,10 +497,10 @@ encodeLoop: for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 - h0 := hash8(cv0, betterLongTableBits) + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} index0 += 2 } @@ -468,8 +518,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv, betterShortTableBits) - nextHashL := hash8(cv, betterLongTableBits) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) + nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -505,7 +555,7 @@ encodeLoop: } blk.recentOffsets[0] = uint32(offset1) blk.recentOffsets[1] = uint32(offset2) - if debug { + if debugEncoder { println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } } @@ -514,5 +564,674 @@ encodeLoop: // Most notable difference is that src will not be copied for history and // we do not need to check for max match length. func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { + e.ensureHist(len(src)) e.Encode(blk, src) } + +// Encode improves compression... +func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) { + const ( + // Input margin is the number of bytes we read (8) + // and the maximum we will read ahead (2) + inputMargin = 8 + 2 + minNonLiteralBlockSize = 16 + ) + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.longTable[:] { + e.longTable[i] = prevEntry{} + } + e.cur = e.maxMatchOff + e.allDirty = true + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.table[i].offset = v + } + for i := range e.longTable[:] { + v := e.longTable[i].offset + v2 := e.longTable[i].prev + if v < minOff { + v = 0 + v2 = 0 + } else { + v = v - e.cur + e.maxMatchOff + if v2 < minOff { + v2 = 0 + } else { + v2 = v2 - e.cur + e.maxMatchOff + } + } + e.longTable[i] = prevEntry{ + offset: v, + prev: v2, + } + } + e.allDirty = true + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 1. + const stepSize = 1 + + const kSearchStrength = 9 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debugEncoder { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + var t int32 + // We allow the encoder to optionally turn off repeat offsets across blocks + canRepeat := len(blk.sequences) > 2 + var matched int32 + + for { + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) + nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) + candidateL := e.longTable[nextHashL] + candidateS := e.table[nextHashS] + + const repOff = 1 + repIndex := s - offset1 + repOff + off := s + e.cur + e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} + e.markLongShardDirty(nextHashL) + e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} + e.markShortShardDirty(nextHashS) + + if canRepeat { + if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { + // Consider history as well. + var seq seq + lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Index match start+1 (long) -> s - 1 + index0 := s + repOff + s += lenght + repOff + + nextEmit = s + if s >= sLimit { + if debugEncoder { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + // Index skipped... + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.markLongShardDirty(h0) + h1 := hashLen(cv1, betterShortTableBits, betterShortLen) + e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.markShortShardDirty(h1) + index0 += 2 + } + cv = load6432(src, s) + continue + } + const repOff2 = 1 + + // We deviate from the reference encoder and also check offset 2. + // Still slower and not much better, so disabled. + // repIndex = s - offset2 + repOff2 + if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { + // Consider history as well. + var seq seq + lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff2 + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 2 + seq.offset = 2 + if debugSequences { + println("repeat sequence 2", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + index0 := s + repOff2 + s += lenght + repOff2 + nextEmit = s + if s >= sLimit { + if debugEncoder { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + + // Index skipped... + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.markLongShardDirty(h0) + h1 := hashLen(cv1, betterShortTableBits, betterShortLen) + e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.markShortShardDirty(h1) + index0 += 2 + } + cv = load6432(src, s) + // Swap offsets + offset1, offset2 = offset2, offset1 + continue + } + } + // Find the offsets of our two matches. + coffsetL := candidateL.offset - e.cur + coffsetLP := candidateL.prev - e.cur + + // Check if we have a long match. + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matched = e.matchlen(s+8, coffsetL+8, src) + 8 + t = coffsetL + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + + if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { + // Found a long match, at least 8 bytes. + prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8 + if prevMatch > matched { + matched = prevMatch + t = coffsetLP + } + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + } + break + } + + // Check if we have a long match on prev. + if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { + // Found a long match, at least 8 bytes. + matched = e.matchlen(s+8, coffsetLP+8, src) + 8 + t = coffsetLP + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + break + } + + coffsetS := candidateS.offset - e.cur + + // Check if we have a short match. + if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { + // found a regular match + matched = e.matchlen(s+4, coffsetS+4, src) + 4 + + // See if we can find a long match at s+1 + const checkAt = 1 + cv := load6432(src, s+checkAt) + nextHashL = hashLen(cv, betterLongTableBits, betterLongLen) + candidateL = e.longTable[nextHashL] + coffsetL = candidateL.offset - e.cur + + // We can store it, since we have at least a 4 byte match. + e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} + e.markLongShardDirty(nextHashL) + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 + if matchedNext > matched { + t = coffsetL + s += checkAt + matched = matchedNext + if debugMatches { + println("long match (after short)") + } + break + } + } + + // Check prev long... + coffsetL = candidateL.prev - e.cur + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 + if matchedNext > matched { + t = coffsetL + s += checkAt + matched = matchedNext + if debugMatches { + println("prev long match (after short)") + } + break + } + } + t = coffsetS + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + if debugMatches { + println("short match") + } + break + } + + // No match found, move forward in input. + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + // Try to find a better match by searching for a long match at the end of the current best match + if s+matched < sLimit { + nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen) + cv := load3232(src, s) + candidateL := e.longTable[nextHashL] + coffsetL := candidateL.offset - e.cur - matched + if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + // Found a long match, at least 4 bytes. + matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + if matchedNext > matched { + t = coffsetL + matched = matchedNext + if debugMatches { + println("long match at end-of-match") + } + } + } + + // Check prev long... + if true { + coffsetL = candidateL.prev - e.cur - matched + if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + // Found a long match, at least 4 bytes. + matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + if matchedNext > matched { + t = coffsetL + matched = matchedNext + if debugMatches { + println("prev long match at end-of-match") + } + } + } + } + } + // A match has been found. Update recent offsets. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the n-byte match as long as possible. + l := matched + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + + // Index match start+1 (long) -> s - 1 + index0 := s - l + 1 + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.markLongShardDirty(h0) + h1 := hashLen(cv1, betterShortTableBits, betterShortLen) + e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.markShortShardDirty(h1) + index0 += 2 + } + + cv = load6432(src, s) + if !canRepeat { + continue + } + + // Check offset 2 + for { + o2 := s - offset2 + if load3232(src, o2) != uint32(cv) { + // Do regular search + break + } + + // Store this, since we have it. + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) + nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) + + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + l := 4 + e.matchlen(s+4, o2+4, src) + + e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} + e.markLongShardDirty(nextHashL) + e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} + e.markShortShardDirty(nextHashS) + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + // Finished + break encodeLoop + } + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + if debugEncoder { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } +} + +// ResetDict will reset and set a dictionary if not nil +func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) { + e.resetBase(d, singleBlock) + if d != nil { + panic("betterFastEncoder: Reset with dict") + } +} + +// ResetDict will reset and set a dictionary if not nil +func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) { + e.resetBase(d, singleBlock) + if d == nil { + return + } + // Init or copy dict table + if len(e.dictTable) != len(e.table) || d.id != e.lastDictID { + if len(e.dictTable) != len(e.table) { + e.dictTable = make([]tableEntry, len(e.table)) + } + end := int32(len(d.content)) - 8 + e.maxMatchOff + for i := e.maxMatchOff; i < end; i += 4 { + const hashLog = betterShortTableBits + + cv := load6432(d.content, i-e.maxMatchOff) + nextHash := hashLen(cv, hashLog, betterShortLen) // 0 -> 4 + nextHash1 := hashLen(cv>>8, hashLog, betterShortLen) // 1 -> 5 + nextHash2 := hashLen(cv>>16, hashLog, betterShortLen) // 2 -> 6 + nextHash3 := hashLen(cv>>24, hashLog, betterShortLen) // 3 -> 7 + e.dictTable[nextHash] = tableEntry{ + val: uint32(cv), + offset: i, + } + e.dictTable[nextHash1] = tableEntry{ + val: uint32(cv >> 8), + offset: i + 1, + } + e.dictTable[nextHash2] = tableEntry{ + val: uint32(cv >> 16), + offset: i + 2, + } + e.dictTable[nextHash3] = tableEntry{ + val: uint32(cv >> 24), + offset: i + 3, + } + } + e.lastDictID = d.id + e.allDirty = true + } + + // Init or copy dict table + if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID { + if len(e.dictLongTable) != len(e.longTable) { + e.dictLongTable = make([]prevEntry, len(e.longTable)) + } + if len(d.content) >= 8 { + cv := load6432(d.content, 0) + h := hashLen(cv, betterLongTableBits, betterLongLen) + e.dictLongTable[h] = prevEntry{ + offset: e.maxMatchOff, + prev: e.dictLongTable[h].offset, + } + + end := int32(len(d.content)) - 8 + e.maxMatchOff + off := 8 // First to read + for i := e.maxMatchOff + 1; i < end; i++ { + cv = cv>>8 | (uint64(d.content[off]) << 56) + h := hashLen(cv, betterLongTableBits, betterLongLen) + e.dictLongTable[h] = prevEntry{ + offset: i, + prev: e.dictLongTable[h].offset, + } + off++ + } + } + e.lastDictID = d.id + e.allDirty = true + } + + // Reset table to initial state + { + dirtyShardCnt := 0 + if !e.allDirty { + for i := range e.shortTableShardDirty { + if e.shortTableShardDirty[i] { + dirtyShardCnt++ + } + } + } + const shardCnt = betterShortTableShardCnt + const shardSize = betterShortTableShardSize + if e.allDirty || dirtyShardCnt > shardCnt*4/6 { + copy(e.table[:], e.dictTable) + for i := range e.shortTableShardDirty { + e.shortTableShardDirty[i] = false + } + } else { + for i := range e.shortTableShardDirty { + if !e.shortTableShardDirty[i] { + continue + } + + copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize]) + e.shortTableShardDirty[i] = false + } + } + } + { + dirtyShardCnt := 0 + if !e.allDirty { + for i := range e.shortTableShardDirty { + if e.shortTableShardDirty[i] { + dirtyShardCnt++ + } + } + } + const shardCnt = betterLongTableShardCnt + const shardSize = betterLongTableShardSize + if e.allDirty || dirtyShardCnt > shardCnt*4/6 { + copy(e.longTable[:], e.dictLongTable) + for i := range e.longTableShardDirty { + e.longTableShardDirty[i] = false + } + } else { + for i := range e.longTableShardDirty { + if !e.longTableShardDirty[i] { + continue + } + + copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize]) + e.longTableShardDirty[i] = false + } + } + } + e.cur = e.maxMatchOff + e.allDirty = false +} + +func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) { + e.longTableShardDirty[entryNum/betterLongTableShardSize] = true +} + +func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) { + e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index 50276bc..d6b3104 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -10,10 +10,16 @@ const ( dFastLongTableBits = 17 // Bits used in the long match table dFastLongTableSize = 1 << dFastLongTableBits // Size of the table dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. + dFastLongLen = 8 // Bytes used for table hash + + dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table + dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard dFastShortTableBits = tableBits // Bits used in the short match table dFastShortTableSize = 1 << dFastShortTableBits // Size of the table dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. + dFastShortLen = 5 // Bytes used for table hash + ) type doubleFastEncoder struct { @@ -21,6 +27,13 @@ type doubleFastEncoder struct { longTable [dFastLongTableSize]tableEntry } +type doubleFastEncoderDict struct { + fastEncoderDict + longTable [dFastLongTableSize]tableEntry + dictLongTable []tableEntry + longTableShardDirty [dLongTableShardCnt]bool +} + // Encode mimmics functionality in zstd_dfast.c func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) { const ( @@ -99,7 +112,7 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) { blk.literals = append(blk.literals, src[nextEmit:until]...) s.litLen = uint32(until - nextEmit) } - if debug { + if debugEncoder { println("recent offsets:", blk.recentOffsets) } @@ -114,8 +127,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hash5(cv, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -160,7 +173,7 @@ encodeLoop: s += lenght + repOff nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, lenght) } @@ -198,7 +211,7 @@ encodeLoop: // See if we can find a long match at s+1 const checkAt = 1 cv := load6432(src, s+checkAt) - nextHashL = hash8(cv, dFastLongTableBits) + nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen) candidateL = e.longTable[nextHashL] coffsetL = s - (candidateL.offset - e.cur) + checkAt @@ -294,16 +307,16 @@ encodeLoop: cv1 := load6432(src, index1) te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} - e.longTable[hash8(cv0, dFastLongTableBits)] = te0 - e.longTable[hash8(cv1, dFastLongTableBits)] = te1 + e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0 + e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1 cv0 >>= 8 cv1 >>= 8 te0.offset++ te1.offset++ te0.val = uint32(cv0) te1.val = uint32(cv1) - e.table[hash5(cv0, dFastShortTableBits)] = te0 - e.table[hash5(cv1, dFastShortTableBits)] = te1 + e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0 + e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1 cv = load6432(src, s) @@ -320,8 +333,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -358,7 +371,7 @@ encodeLoop: } blk.recentOffsets[0] = uint32(offset1) blk.recentOffsets[1] = uint32(offset2) - if debug { + if debugEncoder { println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } } @@ -417,7 +430,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { blk.literals = append(blk.literals, src[nextEmit:until]...) s.litLen = uint32(until - nextEmit) } - if debug { + if debugEncoder { println("recent offsets:", blk.recentOffsets) } @@ -426,8 +439,8 @@ encodeLoop: var t int32 for { - nextHashS := hash5(cv, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -473,7 +486,7 @@ encodeLoop: s += length + repOff nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, length) } @@ -494,7 +507,7 @@ encodeLoop: // but the likelihood of both the first 4 bytes and the hash matching should be enough. t = candidateL.offset - e.cur if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + panic(fmt.Sprintf("s (%d) <= t (%d). cur: %d", s, t, e.cur)) } if debugAsserts && s-t > e.maxMatchOff { panic("s - t >e.maxMatchOff") @@ -511,7 +524,7 @@ encodeLoop: // See if we can find a long match at s+1 const checkAt = 1 cv := load6432(src, s+checkAt) - nextHashL = hash8(cv, dFastLongTableBits) + nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen) candidateL = e.longTable[nextHashL] coffsetL = s - (candidateL.offset - e.cur) + checkAt @@ -604,16 +617,16 @@ encodeLoop: cv1 := load6432(src, index1) te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} - e.longTable[hash8(cv0, dFastLongTableBits)] = te0 - e.longTable[hash8(cv1, dFastLongTableBits)] = te1 + e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0 + e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1 cv0 >>= 8 cv1 >>= 8 te0.offset++ te1.offset++ te0.val = uint32(cv0) te1.val = uint32(cv1) - e.table[hash5(cv0, dFastShortTableBits)] = te0 - e.table[hash5(cv1, dFastShortTableBits)] = te1 + e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0 + e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1 cv = load6432(src, s) @@ -630,8 +643,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv1>>8, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) + nextHashS := hashLen(cv1>>8, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -667,7 +680,7 @@ encodeLoop: blk.literals = append(blk.literals, src[nextEmit:]...) blk.extraLits = len(src) - int(nextEmit) } - if debug { + if debugEncoder { println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } @@ -676,3 +689,436 @@ encodeLoop: e.cur += int32(len(src)) } } + +// Encode will encode the content, with a dictionary if initialized for it. +func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) { + const ( + // Input margin is the number of bytes we read (8) + // and the maximum we will read ahead (2) + inputMargin = 8 + 2 + minNonLiteralBlockSize = 16 + ) + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.longTable[:] { + e.longTable[i] = tableEntry{} + } + e.markAllShardsDirty() + e.cur = e.maxMatchOff + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.table[i].offset = v + } + for i := range e.longTable[:] { + v := e.longTable[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.longTable[i].offset = v + } + e.markAllShardsDirty() + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 1. + const stepSize = 1 + + const kSearchStrength = 8 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debugEncoder { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + var t int32 + // We allow the encoder to optionally turn off repeat offsets across blocks + canRepeat := len(blk.sequences) > 2 + + for { + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) + candidateL := e.longTable[nextHashL] + candidateS := e.table[nextHashS] + + const repOff = 1 + repIndex := s - offset1 + repOff + entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + e.longTable[nextHashL] = entry + e.markLongShardDirty(nextHashL) + e.table[nextHashS] = entry + e.markShardDirty(nextHashS) + + if canRepeat { + if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { + // Consider history as well. + var seq seq + lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + s += lenght + repOff + nextEmit = s + if s >= sLimit { + if debugEncoder { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + cv = load6432(src, s) + continue + } + } + // Find the offsets of our two matches. + coffsetL := s - (candidateL.offset - e.cur) + coffsetS := s - (candidateS.offset - e.cur) + + // Check if we have a long match. + if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { + // Found a long match, likely at least 8 bytes. + // Reference encoder checks all 8 bytes, we only check 4, + // but the likelihood of both the first 4 bytes and the hash matching should be enough. + t = candidateL.offset - e.cur + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + break + } + + // Check if we have a short match. + if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { + // found a regular match + // See if we can find a long match at s+1 + const checkAt = 1 + cv := load6432(src, s+checkAt) + nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen) + candidateL = e.longTable[nextHashL] + coffsetL = s - (candidateL.offset - e.cur) + checkAt + + // We can store it, since we have at least a 4 byte match. + e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)} + e.markLongShardDirty(nextHashL) + if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { + // Found a long match, likely at least 8 bytes. + // Reference encoder checks all 8 bytes, we only check 4, + // but the likelihood of both the first 4 bytes and the hash matching should be enough. + t = candidateL.offset - e.cur + s += checkAt + if debugMatches { + println("long match (after short)") + } + break + } + + t = candidateS.offset - e.cur + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + if debugMatches { + println("short match") + } + break + } + + // No match found, move forward in input. + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + + // A 4-byte match has been found. Update recent offsets. + // We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the 4-byte match as long as possible. + l := e.matchlen(s+4, t+4, src) + 4 + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + + // Index match start+1 (long) and start+2 (short) + index0 := s - l + 1 + // Index match end-2 (long) and end-1 (short) + index1 := s - 2 + + cv0 := load6432(src, index0) + cv1 := load6432(src, index1) + te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} + te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} + longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen) + longHash2 := hashLen(cv0, dFastLongTableBits, dFastLongLen) + e.longTable[longHash1] = te0 + e.longTable[longHash2] = te1 + e.markLongShardDirty(longHash1) + e.markLongShardDirty(longHash2) + cv0 >>= 8 + cv1 >>= 8 + te0.offset++ + te1.offset++ + te0.val = uint32(cv0) + te1.val = uint32(cv1) + hashVal1 := hashLen(cv0, dFastShortTableBits, dFastShortLen) + hashVal2 := hashLen(cv1, dFastShortTableBits, dFastShortLen) + e.table[hashVal1] = te0 + e.markShardDirty(hashVal1) + e.table[hashVal2] = te1 + e.markShardDirty(hashVal2) + + cv = load6432(src, s) + + if !canRepeat { + continue + } + + // Check offset 2 + for { + o2 := s - offset2 + if load3232(src, o2) != uint32(cv) { + // Do regular search + break + } + + // Store this, since we have it. + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) + + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + l := 4 + e.matchlen(s+4, o2+4, src) + + entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + e.longTable[nextHashL] = entry + e.markLongShardDirty(nextHashL) + e.table[nextHashS] = entry + e.markShardDirty(nextHashS) + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + // Finished + break encodeLoop + } + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + if debugEncoder { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } + // If we encoded more than 64K mark all dirty. + if len(src) > 64<<10 { + e.markAllShardsDirty() + } +} + +// ResetDict will reset and set a dictionary if not nil +func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) { + e.fastEncoder.Reset(d, singleBlock) + if d != nil { + panic("doubleFastEncoder: Reset with dict not supported") + } +} + +// ResetDict will reset and set a dictionary if not nil +func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) { + allDirty := e.allDirty + e.fastEncoderDict.Reset(d, singleBlock) + if d == nil { + return + } + + // Init or copy dict table + if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID { + if len(e.dictLongTable) != len(e.longTable) { + e.dictLongTable = make([]tableEntry, len(e.longTable)) + } + if len(d.content) >= 8 { + cv := load6432(d.content, 0) + e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{ + val: uint32(cv), + offset: e.maxMatchOff, + } + end := int32(len(d.content)) - 8 + e.maxMatchOff + for i := e.maxMatchOff + 1; i < end; i++ { + cv = cv>>8 | (uint64(d.content[i-e.maxMatchOff+7]) << 56) + e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{ + val: uint32(cv), + offset: i, + } + } + } + e.lastDictID = d.id + e.allDirty = true + } + // Reset table to initial state + e.cur = e.maxMatchOff + + dirtyShardCnt := 0 + if !allDirty { + for i := range e.longTableShardDirty { + if e.longTableShardDirty[i] { + dirtyShardCnt++ + } + } + } + + if allDirty || dirtyShardCnt > dLongTableShardCnt/2 { + copy(e.longTable[:], e.dictLongTable) + for i := range e.longTableShardDirty { + e.longTableShardDirty[i] = false + } + return + } + for i := range e.longTableShardDirty { + if !e.longTableShardDirty[i] { + continue + } + + copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize]) + e.longTableShardDirty[i] = false + } +} + +func (e *doubleFastEncoderDict) markLongShardDirty(entryNum uint32) { + e.longTableShardDirty[entryNum/dLongTableShardSize] = true +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go index 4104b45..f51ab52 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go @@ -6,17 +6,16 @@ package zstd import ( "fmt" - "math" - "math/bits" - - "github.com/klauspost/compress/zstd/internal/xxhash" ) const ( - tableBits = 15 // Bits used in the table - tableSize = 1 << tableBits // Size of the table - tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. - maxMatchLength = 131074 + tableBits = 15 // Bits used in the table + tableSize = 1 << tableBits // Size of the table + tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table + tableShardSize = tableSize / tableShardCnt // Size of an individual shard + tableFastHashLen = 6 + tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. + maxMatchLength = 131074 ) type tableEntry struct { @@ -24,51 +23,16 @@ type tableEntry struct { offset int32 } -type fastBase struct { - // cur is the offset at the start of hist - cur int32 - // maximum offset. Should be at least 2x block size. - maxMatchOff int32 - hist []byte - crc *xxhash.Digest - tmp [8]byte - blk *blockEnc -} - type fastEncoder struct { fastBase table [tableSize]tableEntry } -// CRC returns the underlying CRC writer. -func (e *fastBase) CRC() *xxhash.Digest { - return e.crc -} - -// AppendCRC will append the CRC to the destination slice and return it. -func (e *fastBase) AppendCRC(dst []byte) []byte { - crc := e.crc.Sum(e.tmp[:0]) - dst = append(dst, crc[7], crc[6], crc[5], crc[4]) - return dst -} - -// WindowSize returns the window size of the encoder, -// or a window size small enough to contain the input size, if > 0. -func (e *fastBase) WindowSize(size int) int32 { - if size > 0 && size < int(e.maxMatchOff) { - b := int32(1) << uint(bits.Len(uint(size))) - // Keep minimum window. - if b < 1024 { - b = 1024 - } - return b - } - return e.maxMatchOff -} - -// Block returns the current block. -func (e *fastBase) Block() *blockEnc { - return e.blk +type fastEncoderDict struct { + fastEncoder + dictTable []tableEntry + tableShardDirty [tableShardCnt]bool + allDirty bool } // Encode mimmics functionality in zstd_fast.c @@ -121,7 +85,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { // TEMPLATE const hashLog = tableBits // seems global, but would be nice to tweak. - const kSearchStrength = 8 + const kSearchStrength = 6 // nextEmit is where in src the next emitLiteral should start from. nextEmit := s @@ -138,7 +102,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { blk.literals = append(blk.literals, src[nextEmit:until]...) s.litLen = uint32(until - nextEmit) } - if debug { + if debugEncoder { println("recent offsets:", blk.recentOffsets) } @@ -157,8 +121,8 @@ encodeLoop: panic("offset0 was 0") } - nextHash := hash6(cv, hashLog) - nextHash2 := hash6(cv>>8, hashLog) + nextHash := hashLen(cv, hashLog, tableFastHashLen) + nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen) candidate := e.table[nextHash] candidate2 := e.table[nextHash2] repIndex := s - offset1 + 2 @@ -170,20 +134,7 @@ encodeLoop: // Consider history as well. var seq seq var length int32 - // length = 4 + e.matchlen(s+6, repIndex+4, src) - { - a := src[s+6:] - b := src[repIndex+4:] - endI := len(a) & (math.MaxInt32 - 7) - length = int32(endI) + 4 - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 - break - } - } - } - + length = 4 + e.matchlen(s+6, repIndex+4, src) seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. @@ -213,7 +164,7 @@ encodeLoop: s += length + 2 nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, length) } @@ -270,20 +221,7 @@ encodeLoop: } // Extend the 4-byte match as long as possible. - //l := e.matchlen(s+4, t+4, src) + 4 - var l int32 - { - a := src[s+4:] - b := src[t+4:] - endI := len(a) & (math.MaxInt32 - 7) - l = int32(endI) + 4 - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 - break - } - } - } + l := e.matchlen(s+4, t+4, src) + 4 // Extend backwards tMin := s - e.maxMatchOff @@ -320,23 +258,10 @@ encodeLoop: if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { // We have at least 4 byte match. // No need to check backwards. We come straight from a match - //l := 4 + e.matchlen(s+4, o2+4, src) - var l int32 - { - a := src[s+4:] - b := src[o2+4:] - endI := len(a) & (math.MaxInt32 - 7) - l = int32(endI) + 4 - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 - break - } - } - } + l := 4 + e.matchlen(s+4, o2+4, src) // Store this, since we have it. - nextHash := hash6(cv, hashLog) + nextHash := hashLen(cv, hashLog, tableFastHashLen) e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} seq.matchLen = uint32(l) - zstdMinMatch seq.litLen = 0 @@ -365,7 +290,7 @@ encodeLoop: } blk.recentOffsets[0] = uint32(offset1) blk.recentOffsets[1] = uint32(offset2) - if debug { + if debugEncoder { println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } } @@ -378,7 +303,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { inputMargin = 8 minNonLiteralBlockSize = 1 + 1 + inputMargin ) - if debug { + if debugEncoder { if len(src) > maxBlockSize { panic("src too big") } @@ -409,7 +334,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { // TEMPLATE const hashLog = tableBits // seems global, but would be nice to tweak. - const kSearchStrength = 8 + const kSearchStrength = 6 // nextEmit is where in src the next emitLiteral should start from. nextEmit := s @@ -426,7 +351,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { blk.literals = append(blk.literals, src[nextEmit:until]...) s.litLen = uint32(until - nextEmit) } - if debug { + if debugEncoder { println("recent offsets:", blk.recentOffsets) } @@ -440,8 +365,8 @@ encodeLoop: // By not using them for the first 3 matches for { - nextHash := hash6(cv, hashLog) - nextHash2 := hash6(cv>>8, hashLog) + nextHash := hashLen(cv, hashLog, tableFastHashLen) + nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen) candidate := e.table[nextHash] candidate2 := e.table[nextHash2] repIndex := s - offset1 + 2 @@ -452,21 +377,7 @@ encodeLoop: if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) { // Consider history as well. var seq seq - // length := 4 + e.matchlen(s+6, repIndex+4, src) - // length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:])) - var length int32 - { - a := src[s+6:] - b := src[repIndex+4:] - endI := len(a) & (math.MaxInt32 - 7) - length = int32(endI) + 4 - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 - break - } - } - } + length := 4 + e.matchlen(s+6, repIndex+4, src) seq.matchLen = uint32(length - zstdMinMatch) @@ -497,7 +408,7 @@ encodeLoop: s += length + 2 nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, length) } @@ -556,21 +467,7 @@ encodeLoop: panic(fmt.Sprintf("t (%d) < 0 ", t)) } // Extend the 4-byte match as long as possible. - //l := e.matchlenNoHist(s+4, t+4, src) + 4 - // l := int32(matchLen(src[s+4:], src[t+4:])) + 4 - var l int32 - { - a := src[s+4:] - b := src[t+4:] - endI := len(a) & (math.MaxInt32 - 7) - l = int32(endI) + 4 - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 - break - } - } - } + l := e.matchlen(s+4, t+4, src) + 4 // Extend backwards tMin := s - e.maxMatchOff @@ -607,24 +504,10 @@ encodeLoop: if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) { // We have at least 4 byte match. // No need to check backwards. We come straight from a match - //l := 4 + e.matchlenNoHist(s+4, o2+4, src) - // l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) - var l int32 - { - a := src[s+4:] - b := src[o2+4:] - endI := len(a) & (math.MaxInt32 - 7) - l = int32(endI) + 4 - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 - break - } - } - } + l := 4 + e.matchlen(s+4, o2+4, src) // Store this, since we have it. - nextHash := hash6(cv, hashLog) + nextHash := hashLen(cv, hashLog, tableFastHashLen) e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} seq.matchLen = uint32(l) - zstdMinMatch seq.litLen = 0 @@ -651,7 +534,7 @@ encodeLoop: blk.literals = append(blk.literals, src[nextEmit:]...) blk.extraLits = len(src) - int(nextEmit) } - if debug { + if debugEncoder { println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } // We do not store history, so we must offset e.cur to avoid false matches for next user. @@ -660,96 +543,356 @@ encodeLoop: } } -func (e *fastBase) addBlock(src []byte) int32 { - if debugAsserts && e.cur > bufferReset { - panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset)) +// Encode will encode the content, with a dictionary if initialized for it. +func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) { + const ( + inputMargin = 8 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if e.allDirty || len(src) > 32<<10 { + e.fastEncoder.Encode(blk, src) + e.allDirty = true + return } - // check if we have space already - if len(e.hist)+len(src) > cap(e.hist) { - if cap(e.hist) == 0 { - l := e.maxMatchOff * 2 - // Make it at least 1MB. - if l < 1<<20 { - l = 1 << 20 + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} } - e.hist = make([]byte, 0, l) - } else { - if cap(e.hist) < int(e.maxMatchOff*2) { - panic("unexpected buffer size") + e.cur = e.maxMatchOff + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff } - // Move down - offset := int32(len(e.hist)) - e.maxMatchOff - copy(e.hist[0:e.maxMatchOff], e.hist[offset:]) - e.cur += offset - e.hist = e.hist[:e.maxMatchOff] + e.table[i].offset = v } + e.cur = e.maxMatchOff + break } - s := int32(len(e.hist)) - e.hist = append(e.hist, src...) - return s -} -// useBlock will replace the block with the provided one, -// but transfer recent offsets from the previous. -func (e *fastBase) UseBlock(enc *blockEnc) { - enc.reset(e.blk) - e.blk = enc -} + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } -func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 { - // Extend the match to be as long as possible. - return int32(matchLen(src[s:], src[t:])) -} + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 2. + const stepSize = 2 -func (e *fastBase) matchlen(s, t int32, src []byte) int32 { - if debugAsserts { - if s < 0 { - err := fmt.Sprintf("s (%d) < 0", s) - panic(err) + // TEMPLATE + const hashLog = tableBits + // seems global, but would be nice to tweak. + const kSearchStrength = 7 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return } - if t < 0 { - err := fmt.Sprintf("s (%d) < 0", s) - panic(err) + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debugEncoder { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + // t will contain the match offset when we find one. + // When existing the search loop, we have already checked 4 bytes. + var t int32 + + // We will not use repeat offsets across blocks. + // By not using them for the first 3 matches + canRepeat := len(blk.sequences) > 2 + + for { + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + nextHash := hashLen(cv, hashLog, tableFastHashLen) + nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen) + candidate := e.table[nextHash] + candidate2 := e.table[nextHash2] + repIndex := s - offset1 + 2 + + e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} + e.markShardDirty(nextHash) + e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)} + e.markShardDirty(nextHash2) + + if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { + // Consider history as well. + var seq seq + var length int32 + length = 4 + e.matchlen(s+6, repIndex+4, src) + + seq.matchLen = uint32(length - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + 2 + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + sMin := s - e.maxMatchOff + if sMin < 0 { + sMin = 0 + } + for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + s += length + 2 + nextEmit = s + if s >= sLimit { + if debugEncoder { + println("repeat ended", s, length) + + } + break encodeLoop + } + cv = load6432(src, s) + continue + } + coffset0 := s - (candidate.offset - e.cur) + coffset1 := s - (candidate2.offset - e.cur) + 1 + if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val { + // found a regular match + t = candidate.offset - e.cur + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + break + } + + if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val { + // found a regular match + t = candidate2.offset - e.cur + s++ + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + break + } + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + // A 4-byte match has been found. We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the 4-byte match as long as possible. + l := e.matchlen(s+4, t+4, src) + 4 + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence. + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + // Don't use repeat offsets + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) } - if s-t > e.maxMatchOff { - err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff) - panic(err) + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop } - if len(src)-int(s) > maxCompressedBlockSize { - panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize)) + cv = load6432(src, s) + + // Check offset 2 + if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + l := 4 + e.matchlen(s+4, o2+4, src) + + // Store this, since we have it. + nextHash := hashLen(cv, hashLog, tableFastHashLen) + e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} + e.markShardDirty(nextHash) + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + break encodeLoop + } + // Prepare next loop. + cv = load6432(src, s) } } - // Extend the match to be as long as possible. - return int32(matchLen(src[s:], src[t:])) + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + if debugEncoder { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } +} + +// ResetDict will reset and set a dictionary if not nil +func (e *fastEncoder) Reset(d *dict, singleBlock bool) { + e.resetBase(d, singleBlock) + if d != nil { + panic("fastEncoder: Reset with dict") + } } -// Reset the encoding table. -func (e *fastBase) Reset(singleBlock bool) { - if e.blk == nil { - e.blk = &blockEnc{} - e.blk.init() - } else { - e.blk.reset(nil) +// ResetDict will reset and set a dictionary if not nil +func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) { + e.resetBase(d, singleBlock) + if d == nil { + return + } + + // Init or copy dict table + if len(e.dictTable) != len(e.table) || d.id != e.lastDictID { + if len(e.dictTable) != len(e.table) { + e.dictTable = make([]tableEntry, len(e.table)) + } + if true { + end := e.maxMatchOff + int32(len(d.content)) - 8 + for i := e.maxMatchOff; i < end; i += 3 { + const hashLog = tableBits + + cv := load6432(d.content, i-e.maxMatchOff) + nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 5 + nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 6 + nextHash2 := hashLen(cv>>16, hashLog, tableFastHashLen) // 2 -> 7 + e.dictTable[nextHash] = tableEntry{ + val: uint32(cv), + offset: i, + } + e.dictTable[nextHash1] = tableEntry{ + val: uint32(cv >> 8), + offset: i + 1, + } + e.dictTable[nextHash2] = tableEntry{ + val: uint32(cv >> 16), + offset: i + 2, + } + } + } + e.lastDictID = d.id + e.allDirty = true } - e.blk.initNewEncode() - if e.crc == nil { - e.crc = xxhash.New() - } else { - e.crc.Reset() + + e.cur = e.maxMatchOff + dirtyShardCnt := 0 + if !e.allDirty { + for i := range e.tableShardDirty { + if e.tableShardDirty[i] { + dirtyShardCnt++ + } + } } - if !singleBlock && cap(e.hist) < int(e.maxMatchOff*2) { - l := e.maxMatchOff * 2 - // Make it at least 1MB. - if l < 1<<20 { - l = 1 << 20 + + const shardCnt = tableShardCnt + const shardSize = tableShardSize + if e.allDirty || dirtyShardCnt > shardCnt*4/6 { + copy(e.table[:], e.dictTable) + for i := range e.tableShardDirty { + e.tableShardDirty[i] = false } - e.hist = make([]byte, 0, l) + e.allDirty = false + return } - // We offset current position so everything will be out of reach. - // If above reset line, history will be purged. - if e.cur < bufferReset { - e.cur += e.maxMatchOff + int32(len(e.hist)) + for i := range e.tableShardDirty { + if !e.tableShardDirty[i] { + continue + } + + copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize]) + e.tableShardDirty[i] = false } - e.hist = e.hist[:0] + e.allDirty = false +} + +func (e *fastEncoderDict) markAllShardsDirty() { + e.allDirty = true +} + +func (e *fastEncoderDict) markShardDirty(entryNum uint32) { + e.tableShardDirty[entryNum/tableShardSize] = true } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_params.go b/vendor/github.com/klauspost/compress/zstd/enc_params.go deleted file mode 100644 index d874116..0000000 --- a/vendor/github.com/klauspost/compress/zstd/enc_params.go +++ /dev/null @@ -1,157 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -/* -// encParams are not really used, just here for reference. -type encParams struct { - // largest match distance : larger == more compression, more memory needed during decompression - windowLog uint8 - - // fully searched segment : larger == more compression, slower, more memory (useless for fast) - chainLog uint8 - - // dispatch table : larger == faster, more memory - hashLog uint8 - - // < nb of searches : larger == more compression, slower - searchLog uint8 - - // < match length searched : larger == faster decompression, sometimes less compression - minMatch uint8 - - // acceptable match size for optimal parser (only) : larger == more compression, slower - targetLength uint32 - - // see ZSTD_strategy definition above - strategy strategy -} - -// strategy defines the algorithm to use when generating sequences. -type strategy uint8 - -const ( - // Compression strategies, listed from fastest to strongest - strategyFast strategy = iota + 1 - strategyDfast - strategyGreedy - strategyLazy - strategyLazy2 - strategyBtlazy2 - strategyBtopt - strategyBtultra - strategyBtultra2 - // note : new strategies _might_ be added in the future. - // Only the order (from fast to strong) is guaranteed - -) - -var defEncParams = [4][]encParams{ - { // "default" - for any srcSize > 256 KB - // W, C, H, S, L, TL, strat - {19, 12, 13, 1, 6, 1, strategyFast}, // base for negative levels - {19, 13, 14, 1, 7, 0, strategyFast}, // level 1 - {20, 15, 16, 1, 6, 0, strategyFast}, // level 2 - {21, 16, 17, 1, 5, 1, strategyDfast}, // level 3 - {21, 18, 18, 1, 5, 1, strategyDfast}, // level 4 - {21, 18, 19, 2, 5, 2, strategyGreedy}, // level 5 - {21, 19, 19, 3, 5, 4, strategyGreedy}, // level 6 - {21, 19, 19, 3, 5, 8, strategyLazy}, // level 7 - {21, 19, 19, 3, 5, 16, strategyLazy2}, // level 8 - {21, 19, 20, 4, 5, 16, strategyLazy2}, // level 9 - {22, 20, 21, 4, 5, 16, strategyLazy2}, // level 10 - {22, 21, 22, 4, 5, 16, strategyLazy2}, // level 11 - {22, 21, 22, 5, 5, 16, strategyLazy2}, // level 12 - {22, 21, 22, 5, 5, 32, strategyBtlazy2}, // level 13 - {22, 22, 23, 5, 5, 32, strategyBtlazy2}, // level 14 - {22, 23, 23, 6, 5, 32, strategyBtlazy2}, // level 15 - {22, 22, 22, 5, 5, 48, strategyBtopt}, // level 16 - {23, 23, 22, 5, 4, 64, strategyBtopt}, // level 17 - {23, 23, 22, 6, 3, 64, strategyBtultra}, // level 18 - {23, 24, 22, 7, 3, 256, strategyBtultra2}, // level 19 - {25, 25, 23, 7, 3, 256, strategyBtultra2}, // level 20 - {26, 26, 24, 7, 3, 512, strategyBtultra2}, // level 21 - {27, 27, 25, 9, 3, 999, strategyBtultra2}, // level 22 - }, - { // for srcSize <= 256 KB - // W, C, H, S, L, T, strat - {18, 12, 13, 1, 5, 1, strategyFast}, // base for negative levels - {18, 13, 14, 1, 6, 0, strategyFast}, // level 1 - {18, 14, 14, 1, 5, 1, strategyDfast}, // level 2 - {18, 16, 16, 1, 4, 1, strategyDfast}, // level 3 - {18, 16, 17, 2, 5, 2, strategyGreedy}, // level 4. - {18, 18, 18, 3, 5, 2, strategyGreedy}, // level 5. - {18, 18, 19, 3, 5, 4, strategyLazy}, // level 6. - {18, 18, 19, 4, 4, 4, strategyLazy}, // level 7 - {18, 18, 19, 4, 4, 8, strategyLazy2}, // level 8 - {18, 18, 19, 5, 4, 8, strategyLazy2}, // level 9 - {18, 18, 19, 6, 4, 8, strategyLazy2}, // level 10 - {18, 18, 19, 5, 4, 12, strategyBtlazy2}, // level 11. - {18, 19, 19, 7, 4, 12, strategyBtlazy2}, // level 12. - {18, 18, 19, 4, 4, 16, strategyBtopt}, // level 13 - {18, 18, 19, 4, 3, 32, strategyBtopt}, // level 14. - {18, 18, 19, 6, 3, 128, strategyBtopt}, // level 15. - {18, 19, 19, 6, 3, 128, strategyBtultra}, // level 16. - {18, 19, 19, 8, 3, 256, strategyBtultra}, // level 17. - {18, 19, 19, 6, 3, 128, strategyBtultra2}, // level 18. - {18, 19, 19, 8, 3, 256, strategyBtultra2}, // level 19. - {18, 19, 19, 10, 3, 512, strategyBtultra2}, // level 20. - {18, 19, 19, 12, 3, 512, strategyBtultra2}, // level 21. - {18, 19, 19, 13, 3, 999, strategyBtultra2}, // level 22. - }, - { // for srcSize <= 128 KB - // W, C, H, S, L, T, strat - {17, 12, 12, 1, 5, 1, strategyFast}, // base for negative levels - {17, 12, 13, 1, 6, 0, strategyFast}, // level 1 - {17, 13, 15, 1, 5, 0, strategyFast}, // level 2 - {17, 15, 16, 2, 5, 1, strategyDfast}, // level 3 - {17, 17, 17, 2, 4, 1, strategyDfast}, // level 4 - {17, 16, 17, 3, 4, 2, strategyGreedy}, // level 5 - {17, 17, 17, 3, 4, 4, strategyLazy}, // level 6 - {17, 17, 17, 3, 4, 8, strategyLazy2}, // level 7 - {17, 17, 17, 4, 4, 8, strategyLazy2}, // level 8 - {17, 17, 17, 5, 4, 8, strategyLazy2}, // level 9 - {17, 17, 17, 6, 4, 8, strategyLazy2}, // level 10 - {17, 17, 17, 5, 4, 8, strategyBtlazy2}, // level 11 - {17, 18, 17, 7, 4, 12, strategyBtlazy2}, // level 12 - {17, 18, 17, 3, 4, 12, strategyBtopt}, // level 13. - {17, 18, 17, 4, 3, 32, strategyBtopt}, // level 14. - {17, 18, 17, 6, 3, 256, strategyBtopt}, // level 15. - {17, 18, 17, 6, 3, 128, strategyBtultra}, // level 16. - {17, 18, 17, 8, 3, 256, strategyBtultra}, // level 17. - {17, 18, 17, 10, 3, 512, strategyBtultra}, // level 18. - {17, 18, 17, 5, 3, 256, strategyBtultra2}, // level 19. - {17, 18, 17, 7, 3, 512, strategyBtultra2}, // level 20. - {17, 18, 17, 9, 3, 512, strategyBtultra2}, // level 21. - {17, 18, 17, 11, 3, 999, strategyBtultra2}, // level 22. - }, - { // for srcSize <= 16 KB - // W, C, H, S, L, T, strat - {14, 12, 13, 1, 5, 1, strategyFast}, // base for negative levels - {14, 14, 15, 1, 5, 0, strategyFast}, // level 1 - {14, 14, 15, 1, 4, 0, strategyFast}, // level 2 - {14, 14, 15, 2, 4, 1, strategyDfast}, // level 3 - {14, 14, 14, 4, 4, 2, strategyGreedy}, // level 4 - {14, 14, 14, 3, 4, 4, strategyLazy}, // level 5. - {14, 14, 14, 4, 4, 8, strategyLazy2}, // level 6 - {14, 14, 14, 6, 4, 8, strategyLazy2}, // level 7 - {14, 14, 14, 8, 4, 8, strategyLazy2}, // level 8. - {14, 15, 14, 5, 4, 8, strategyBtlazy2}, // level 9. - {14, 15, 14, 9, 4, 8, strategyBtlazy2}, // level 10. - {14, 15, 14, 3, 4, 12, strategyBtopt}, // level 11. - {14, 15, 14, 4, 3, 24, strategyBtopt}, // level 12. - {14, 15, 14, 5, 3, 32, strategyBtultra}, // level 13. - {14, 15, 15, 6, 3, 64, strategyBtultra}, // level 14. - {14, 15, 15, 7, 3, 256, strategyBtultra}, // level 15. - {14, 15, 15, 5, 3, 48, strategyBtultra2}, // level 16. - {14, 15, 15, 6, 3, 128, strategyBtultra2}, // level 17. - {14, 15, 15, 7, 3, 256, strategyBtultra2}, // level 18. - {14, 15, 15, 8, 3, 256, strategyBtultra2}, // level 19. - {14, 15, 15, 8, 3, 512, strategyBtultra2}, // level 20. - {14, 15, 15, 9, 3, 512, strategyBtultra2}, // level 21. - {14, 15, 15, 10, 3, 999, strategyBtultra2}, // level 22. - }, -} -*/ diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index c56d224..dcc987a 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -33,9 +33,9 @@ type encoder interface { Block() *blockEnc CRC() *xxhash.Digest AppendCRC([]byte) []byte - WindowSize(size int) int32 + WindowSize(size int64) int32 UseBlock(*blockEnc) - Reset(singleBlock bool) + Reset(d *dict, singleBlock bool) } type encoderState struct { @@ -48,6 +48,8 @@ type encoderState struct { err error writeErr error nWritten int64 + nInput int64 + frameContentSize int64 headerWritten bool eofWritten bool fullFrameWritten bool @@ -83,8 +85,6 @@ func (e *Encoder) initialize() { e.encoders = make(chan encoder, e.o.concurrent) for i := 0; i < e.o.concurrent; i++ { enc := e.o.encoder() - // If not single block, history will be allocated on first use. - enc.Reset(true) e.encoders <- enc } } @@ -98,31 +98,47 @@ func (e *Encoder) Reset(w io.Writer) { if cap(s.filling) == 0 { s.filling = make([]byte, 0, e.o.blockSize) } - if cap(s.current) == 0 { - s.current = make([]byte, 0, e.o.blockSize) - } - if cap(s.previous) == 0 { - s.previous = make([]byte, 0, e.o.blockSize) + if e.o.concurrent > 1 { + if cap(s.current) == 0 { + s.current = make([]byte, 0, e.o.blockSize) + } + if cap(s.previous) == 0 { + s.previous = make([]byte, 0, e.o.blockSize) + } + s.current = s.current[:0] + s.previous = s.previous[:0] + if s.writing == nil { + s.writing = &blockEnc{lowMem: e.o.lowMem} + s.writing.init() + } + s.writing.initNewEncode() } if s.encoder == nil { s.encoder = e.o.encoder() } - if s.writing == nil { - s.writing = &blockEnc{} - s.writing.init() - } - s.writing.initNewEncode() s.filling = s.filling[:0] - s.current = s.current[:0] - s.previous = s.previous[:0] - s.encoder.Reset(false) + s.encoder.Reset(e.o.dict, false) s.headerWritten = false s.eofWritten = false s.fullFrameWritten = false s.w = w s.err = nil s.nWritten = 0 + s.nInput = 0 s.writeErr = nil + s.frameContentSize = 0 +} + +// ResetContentSize will reset and set a content size for the next stream. +// If the bytes written does not match the size given an error will be returned +// when calling Close(). +// This is removed when Reset is called. +// Sizes <= 0 results in no content size set. +func (e *Encoder) ResetContentSize(w io.Writer, size int64) { + e.Reset(w) + if size >= 0 { + e.state.frameContentSize = size + } } // Write data to the encoder. @@ -178,6 +194,12 @@ func (e *Encoder) nextBlock(final bool) error { } if !s.headerWritten { // If we have a single block encode, do a sync compression. + if final && len(s.filling) == 0 && !e.o.fullZero { + s.headerWritten = true + s.fullFrameWritten = true + s.eofWritten = true + return nil + } if final && len(s.filling) > 0 { s.current = e.EncodeAll(s.filling, s.current[:0]) var n2 int @@ -186,21 +208,24 @@ func (e *Encoder) nextBlock(final bool) error { return s.err } s.nWritten += int64(n2) + s.nInput += int64(len(s.filling)) s.current = s.current[:0] s.filling = s.filling[:0] s.headerWritten = true s.fullFrameWritten = true + s.eofWritten = true return nil } var tmp [maxHeaderSize]byte fh := frameHeader{ - ContentSize: 0, - WindowSize: uint32(s.encoder.WindowSize(0)), + ContentSize: uint64(s.frameContentSize), + WindowSize: uint32(s.encoder.WindowSize(s.frameContentSize)), SingleSegment: false, Checksum: e.o.crc, - DictID: 0, + DictID: e.o.dict.ID(), } + dst, err := fh.appendTo(tmp[:0]) if err != nil { return err @@ -235,11 +260,52 @@ func (e *Encoder) nextBlock(final bool) error { return s.err } + // SYNC: + if e.o.concurrent == 1 { + src := s.filling + s.nInput += int64(len(s.filling)) + if debugEncoder { + println("Adding sync block,", len(src), "bytes, final:", final) + } + enc := s.encoder + blk := enc.Block() + blk.reset(nil) + enc.Encode(blk, src) + blk.last = final + if final { + s.eofWritten = true + } + + err := errIncompressible + // If we got the exact same number of literals as input, + // assume the literals cannot be compressed. + if len(src) != len(blk.literals) || len(src) != e.o.blockSize { + err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) + } + switch err { + case errIncompressible: + if debugEncoder { + println("Storing incompressible block as raw") + } + blk.encodeRaw(src) + // In fast mode, we do not transfer offsets, so we don't have to deal with changing the. + case nil: + default: + s.err = err + return err + } + _, s.err = s.w.Write(blk.output) + s.nWritten += int64(len(blk.output)) + s.filling = s.filling[:0] + return s.err + } + // Move blocks forward. s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current + s.nInput += int64(len(s.current)) s.wg.Add(1) go func(src []byte) { - if debug { + if debugEncoder { println("Adding block,", len(src), "bytes, final:", final) } defer func() { @@ -280,11 +346,11 @@ func (e *Encoder) nextBlock(final bool) error { // If we got the exact same number of literals as input, // assume the literals cannot be compressed. if len(src) != len(blk.literals) || len(src) != e.o.blockSize { - err = blk.encode(e.o.noEntropy, !e.o.allLitEntropy) + err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) } switch err { case errIncompressible: - if debug { + if debugEncoder { println("Storing incompressible block as raw") } blk.encodeRaw(src) @@ -307,10 +373,16 @@ func (e *Encoder) nextBlock(final bool) error { // // The Copy function uses ReaderFrom if available. func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) { - if debug { + if debugEncoder { println("Using ReadFrom") } - // Maybe handle stuff queued? + + // Flush any current writes. + if len(e.state.filling) > 0 { + if err := e.nextBlock(false); err != nil { + return 0, err + } + } e.state.filling = e.state.filling[:e.o.blockSize] src := e.state.filling for { @@ -324,20 +396,20 @@ func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) { switch err { case io.EOF: e.state.filling = e.state.filling[:len(e.state.filling)-len(src)] - if debug { + if debugEncoder { println("ReadFrom: got EOF final block:", len(e.state.filling)) } - return n, e.nextBlock(true) + return n, nil + case nil: default: - if debug { + if debugEncoder { println("ReadFrom: got error:", err) } e.state.err = err return n, err - case nil: } if len(src) > 0 { - if debug { + if debugEncoder { println("ReadFrom: got space left in source:", len(src)) } continue @@ -382,6 +454,11 @@ func (e *Encoder) Close() error { if err != nil { return err } + if s.frameContentSize > 0 { + if s.nInput != s.frameContentSize { + return fmt.Errorf("frame content size %d given, but %d bytes was written", s.frameContentSize, s.nInput) + } + } if e.state.fullFrameWritten { return s.err } @@ -449,7 +526,6 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { defer func() { // Release encoder reference to last block. // If a non-single block is needed the encoder will reset again. - enc.Reset(true) e.encoders <- enc }() // Use single segments when above minimum window and below 1MB. @@ -459,14 +535,14 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } fh := frameHeader{ ContentSize: uint64(len(src)), - WindowSize: uint32(enc.WindowSize(len(src))), + WindowSize: uint32(enc.WindowSize(int64(len(src)))), SingleSegment: single, Checksum: e.o.crc, - DictID: 0, + DictID: e.o.dict.ID(), } // If less than 1MB, allocate a buffer up front. - if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 { + if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem { dst = make([]byte, 0, len(src)) } dst, err := fh.appendTo(dst) @@ -476,13 +552,18 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { // If we can do everything in one block, prefer that. if len(src) <= maxCompressedBlockSize { + enc.Reset(e.o.dict, true) // Slightly faster with no history and everything in one block. if e.o.crc { _, _ = enc.CRC().Write(src) } blk := enc.Block() blk.last = true - enc.EncodeNoHist(blk, src) + if e.o.dict == nil { + enc.EncodeNoHist(blk, src) + } else { + enc.Encode(blk, src) + } // If we got the exact same number of literals as input, // assume the literals cannot be compressed. @@ -491,12 +572,12 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { if len(blk.literals) != len(src) || len(src) != e.o.blockSize { // Output directly to dst blk.output = dst - err = blk.encode(e.o.noEntropy, !e.o.allLitEntropy) + err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) } switch err { case errIncompressible: - if debug { + if debugEncoder { println("Storing incompressible block as raw") } dst = blk.encodeRawTo(dst, src) @@ -507,7 +588,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } blk.output = oldout } else { - enc.Reset(false) + enc.Reset(e.o.dict, false) blk := enc.Block() for len(src) > 0 { todo := src @@ -518,7 +599,6 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { if e.o.crc { _, _ = enc.CRC().Write(todo) } - blk.reset(nil) blk.pushOffsets() enc.Encode(blk, todo) if len(src) == 0 { @@ -528,12 +608,12 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { // If we got the exact same number of literals as input, // assume the literals cannot be compressed. if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize { - err = blk.encode(e.o.noEntropy, !e.o.allLitEntropy) + err = blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy) } switch err { case errIncompressible: - if debug { + if debugEncoder { println("Storing incompressible block as raw") } dst = blk.encodeRawTo(dst, todo) @@ -543,6 +623,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { default: panic(err) } + blk.reset(nil) } } if e.o.crc { diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go index dfac14d..44d8dbd 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -24,29 +24,45 @@ type encoderOptions struct { allLitEntropy bool customWindow bool customALEntropy bool + customBlockSize bool + lowMem bool + dict *dict } func (o *encoderOptions) setDefault() { *o = encoderOptions{ - // use less ram: true for now, but may change. - concurrent: runtime.GOMAXPROCS(0), - crc: true, - single: nil, - blockSize: 1 << 16, - windowSize: 8 << 20, - level: SpeedDefault, + concurrent: runtime.GOMAXPROCS(0), + crc: true, + single: nil, + blockSize: maxCompressedBlockSize, + windowSize: 8 << 20, + level: SpeedDefault, + allLitEntropy: true, + lowMem: false, } } // encoder returns an encoder with the selected options. func (o encoderOptions) encoder() encoder { switch o.level { + case SpeedFastest: + if o.dict != nil { + return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} + } + return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} + case SpeedDefault: - return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}} + if o.dict != nil { + return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}} + } + return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} case SpeedBetterCompression: - return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} - case SpeedFastest: - return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} + if o.dict != nil { + return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} + } + return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} + case SpeedBestCompression: + return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} } panic("unknown compression level") } @@ -58,8 +74,9 @@ func WithEncoderCRC(b bool) EOption { } // WithEncoderConcurrency will set the concurrency, -// meaning the maximum number of decoders to run concurrently. +// meaning the maximum number of encoders to run concurrently. // The value supplied must be at least 1. +// For streams, setting a value of 1 will disable async compression. // By default this will be set to GOMAXPROCS. func WithEncoderConcurrency(n int) EOption { return func(o *encoderOptions) error { @@ -91,6 +108,7 @@ func WithWindowSize(n int) EOption { o.customWindow = true if o.blockSize > o.windowSize { o.blockSize = o.windowSize + o.customBlockSize = true } return nil } @@ -141,20 +159,20 @@ const ( // By using this, notice that CPU usage may go up in the future. SpeedBetterCompression + // SpeedBestCompression will choose the best available compression option. + // This will offer the best compression no matter the CPU cost. + SpeedBestCompression + // speedLast should be kept as the last actual compression option. // The is not for external usage, but is used to keep track of the valid options. speedLast - - // SpeedBestCompression will choose the best available compression option. - // For now this is not implemented. - SpeedBestCompression = SpeedBetterCompression ) // EncoderLevelFromString will convert a string representation of an encoding level back // to a compression level. The compare is not case sensitive. // If the string wasn't recognized, (false, SpeedDefault) will be returned. func EncoderLevelFromString(s string) (bool, EncoderLevel) { - for l := EncoderLevel(speedNotSet + 1); l < speedLast; l++ { + for l := speedNotSet + 1; l < speedLast; l++ { if strings.EqualFold(s, l.String()) { return true, l } @@ -171,10 +189,11 @@ func EncoderLevelFromZstd(level int) EncoderLevel { return SpeedFastest case level >= 3 && level < 6: return SpeedDefault - case level > 5: + case level >= 6 && level < 10: return SpeedBetterCompression + default: + return SpeedBestCompression } - return SpeedDefault } // String provides a string representation of the compression level. @@ -186,6 +205,8 @@ func (e EncoderLevel) String() string { return "default" case SpeedBetterCompression: return "better" + case SpeedBestCompression: + return "best" default: return "invalid" } @@ -203,10 +224,15 @@ func WithEncoderLevel(l EncoderLevel) EOption { switch o.level { case SpeedFastest: o.windowSize = 4 << 20 + if !o.customBlockSize { + o.blockSize = 1 << 16 + } case SpeedDefault: o.windowSize = 8 << 20 case SpeedBetterCompression: o.windowSize = 16 << 20 + case SpeedBestCompression: + o.windowSize = 32 << 20 } } if !o.customALEntropy { @@ -265,3 +291,27 @@ func WithSingleSegment(b bool) EOption { return nil } } + +// WithLowerEncoderMem will trade in some memory cases trade less memory usage for +// slower encoding speed. +// This will not change the window size which is the primary function for reducing +// memory usage. See WithWindowSize. +func WithLowerEncoderMem(b bool) EOption { + return func(o *encoderOptions) error { + o.lowMem = b + return nil + } +} + +// WithEncoderDict allows to register a dictionary that will be used for the encode. +// The encoder *may* choose to use no dictionary instead for certain payloads. +func WithEncoderDict(dict []byte) EOption { + return func(o *encoderOptions) error { + d, err := loadDict(dict) + if err != nil { + return err + } + o.dict = d + return nil + } +} diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index fc4a566..3ff109c 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -8,27 +8,17 @@ import ( "bytes" "encoding/hex" "errors" - "hash" "io" - "sync" "github.com/klauspost/compress/zstd/internal/xxhash" ) type frameDec struct { - o decoderOptions - crc hash.Hash64 - offset int64 + o decoderOptions + crc *xxhash.Digest WindowSize uint64 - // maxWindowSize is the maximum windows size to support. - // should never be bigger than max-int. - maxWindowSize uint64 - - // In order queue of blocks being decoded. - decoding chan *blockDec - // Frame history passed between blocks history history @@ -38,20 +28,18 @@ type frameDec struct { bBuf byteBuf FrameContentSize uint64 - frameDone sync.WaitGroup DictionaryID *uint32 HasCheckSum bool SingleSegment bool - - // asyncRunning indicates whether the async routine processes input on 'decoding'. - asyncRunningMu sync.Mutex - asyncRunning bool } const ( - // The minimum Window_Size is 1 KB. + // MinWindowSize is the minimum Window Size, which is 1 KB. MinWindowSize = 1 << 10 + + // MaxWindowSize is the maximum encoder window size + // and the default decoder maximum window size. MaxWindowSize = 1 << 29 ) @@ -61,12 +49,11 @@ var ( ) func newFrameDec(o decoderOptions) *frameDec { - d := frameDec{ - o: o, - maxWindowSize: MaxWindowSize, + if o.maxWindowSize > o.maxDecodedSize { + o.maxWindowSize = o.maxDecodedSize } - if d.maxWindowSize > o.maxDecodedSize { - d.maxWindowSize = o.maxDecodedSize + d := frameDec{ + o: o, } return &d } @@ -78,50 +65,74 @@ func newFrameDec(o decoderOptions) *frameDec { func (d *frameDec) reset(br byteBuffer) error { d.HasCheckSum = false d.WindowSize = 0 - var b []byte + var signature [4]byte for { - b = br.readSmall(4) - if b == nil { + var err error + // Check if we can read more... + b, err := br.readSmall(1) + switch err { + case io.EOF, io.ErrUnexpectedEOF: + return io.EOF + default: + return err + case nil: + signature[0] = b[0] + } + // Read the rest, don't allow io.ErrUnexpectedEOF + b, err = br.readSmall(3) + switch err { + case io.EOF: return io.EOF + default: + return err + case nil: + copy(signature[1:], b) } - if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 { - if debug { - println("Not skippable", hex.EncodeToString(b), hex.EncodeToString(skippableFrameMagic)) + + if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 { + if debugDecoder { + println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic)) } // Break if not skippable frame. break } // Read size to skip - b = br.readSmall(4) - if b == nil { - println("Reading Frame Size EOF") - return io.ErrUnexpectedEOF + b, err = br.readSmall(4) + if err != nil { + if debugDecoder { + println("Reading Frame Size", err) + } + return err } n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) println("Skipping frame with", n, "bytes.") - err := br.skipN(int(n)) + err = br.skipN(int(n)) if err != nil { - if debug { + if debugDecoder { println("Reading discarded frame", err) } return err } } - if !bytes.Equal(b, frameMagic) { - println("Got magic numbers: ", b, "want:", frameMagic) + if !bytes.Equal(signature[:], frameMagic) { + if debugDecoder { + println("Got magic numbers: ", signature, "want:", frameMagic) + } return ErrMagicMismatch } // Read Frame_Header_Descriptor fhd, err := br.readByte() if err != nil { - println("Reading Frame_Header_Descriptor", err) + if debugDecoder { + println("Reading Frame_Header_Descriptor", err) + } return err } d.SingleSegment = fhd&(1<<5) != 0 if fhd&(1<<3) != 0 { - return errors.New("Reserved bit set on frame header") + return errors.New("reserved bit set on frame header") } // Read Window_Descriptor @@ -130,7 +141,9 @@ func (d *frameDec) reset(br byteBuffer) error { if !d.SingleSegment { wd, err := br.readByte() if err != nil { - println("Reading Window_Descriptor", err) + if debugDecoder { + println("Reading Window_Descriptor", err) + } return err } printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3) @@ -147,12 +160,11 @@ func (d *frameDec) reset(br byteBuffer) error { if size == 3 { size = 4 } - b = br.readSmall(int(size)) - if b == nil { - if debug { - println("Reading Dictionary_ID", io.ErrUnexpectedEOF) - } - return io.ErrUnexpectedEOF + + b, err := br.readSmall(int(size)) + if err != nil { + println("Reading Dictionary_ID", err) + return err } var id uint32 switch size { @@ -163,7 +175,7 @@ func (d *frameDec) reset(br byteBuffer) error { case 4: id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) } - if debug { + if debugDecoder { println("Dict size", size, "ID:", id) } if id > 0 { @@ -185,12 +197,12 @@ func (d *frameDec) reset(br byteBuffer) error { default: fcsSize = 1 << v } - d.FrameContentSize = 0 + d.FrameContentSize = fcsUnknown if fcsSize > 0 { - b := br.readSmall(fcsSize) - if b == nil { - println("Reading Frame content", io.ErrUnexpectedEOF) - return io.ErrUnexpectedEOF + b, err := br.readSmall(fcsSize) + if err != nil { + println("Reading Frame content", err) + return err } switch fcsSize { case 1: @@ -205,10 +217,11 @@ func (d *frameDec) reset(br byteBuffer) error { d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24) d.FrameContentSize = uint64(d1) | (uint64(d2) << 32) } - if debug { - println("field size bits:", v, "fcsSize:", fcsSize, "FrameContentSize:", d.FrameContentSize, hex.EncodeToString(b[:fcsSize]), "singleseg:", d.SingleSegment, "window:", d.WindowSize) + if debugDecoder { + println("Read FCS:", d.FrameContentSize) } } + // Move this to shared. d.HasCheckSum = fhd&(1<<2) != 0 if d.HasCheckSum { @@ -226,21 +239,31 @@ func (d *frameDec) reset(br byteBuffer) error { } } - if d.WindowSize > d.maxWindowSize { - printf("window size %d > max %d\n", d.WindowSize, d.maxWindowSize) + if d.WindowSize > uint64(d.o.maxWindowSize) { + if debugDecoder { + printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize) + } return ErrWindowSizeExceeded } // The minimum Window_Size is 1 KB. if d.WindowSize < MinWindowSize { - println("got window size: ", d.WindowSize) + if debugDecoder { + println("got window size: ", d.WindowSize) + } return ErrWindowSizeTooSmall } d.history.windowSize = int(d.WindowSize) if d.o.lowMem && d.history.windowSize < maxBlockSize { - d.history.maxSize = d.history.windowSize * 2 + d.history.allocFrameBuffer = d.history.windowSize * 2 + // TODO: Maybe use FrameContent size } else { - d.history.maxSize = d.history.windowSize + maxBlockSize + d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize } + + if debugDecoder { + println("Frame: Dict:", d.DictionaryID, "FrameContentSize:", d.FrameContentSize, "singleseg:", d.SingleSegment, "window:", d.WindowSize, "crc:", d.HasCheckSum) + } + // history contains input - maybe we do something d.rawInput = br return nil @@ -248,56 +271,37 @@ func (d *frameDec) reset(br byteBuffer) error { // next will start decoding the next block from stream. func (d *frameDec) next(block *blockDec) error { - if debug { - printf("decoding new block %p:%p", block, block.data) + if debugDecoder { + println("decoding new block") } err := block.reset(d.rawInput, d.WindowSize) if err != nil { println("block error:", err) // Signal the frame decoder we have a problem. - d.sendErr(block, err) + block.sendErr(err) return err } - block.input <- struct{}{} - if debug { - println("next block:", block) - } - d.asyncRunningMu.Lock() - defer d.asyncRunningMu.Unlock() - if !d.asyncRunning { - return nil - } - if block.Last { - // We indicate the frame is done by sending io.EOF - d.decoding <- block - return io.EOF - } - d.decoding <- block return nil } -// sendEOF will queue an error block on the frame. -// This will cause the frame decoder to return when it encounters the block. -// Returns true if the decoder was added. -func (d *frameDec) sendErr(block *blockDec, err error) bool { - d.asyncRunningMu.Lock() - defer d.asyncRunningMu.Unlock() - if !d.asyncRunning { - return false - } - - println("sending error", err.Error()) - block.sendErr(err) - d.decoding <- block - return true -} - // checkCRC will check the checksum if the frame has one. // Will return ErrCRCMismatch if crc check failed, otherwise nil. func (d *frameDec) checkCRC() error { if !d.HasCheckSum { return nil } + + // We can overwrite upper tmp now + want, err := d.rawInput.readSmall(4) + if err != nil { + println("CRC missing?", err) + return err + } + + if d.o.ignoreChecksum { + return nil + } + var tmp [4]byte got := d.crc.Sum64() // Flip to match file order. @@ -306,142 +310,29 @@ func (d *frameDec) checkCRC() error { tmp[2] = byte(got >> 16) tmp[3] = byte(got >> 24) - // We can overwrite upper tmp now - want := d.rawInput.readSmall(4) - if want == nil { - println("CRC missing?") - return io.ErrUnexpectedEOF - } - if !bytes.Equal(tmp[:], want) { - if debug { + if debugDecoder { println("CRC Check Failed:", tmp[:], "!=", want) } return ErrCRCMismatch } - if debug { + if debugDecoder { println("CRC ok", tmp[:]) } return nil } -func (d *frameDec) initAsync() { - if !d.o.lowMem && !d.SingleSegment { - // set max extra size history to 10MB. - d.history.maxSize = d.history.windowSize + maxBlockSize*5 - } - // re-alloc if more than one extra block size. - if d.o.lowMem && cap(d.history.b) > d.history.maxSize+maxBlockSize { - d.history.b = make([]byte, 0, d.history.maxSize) - } - if cap(d.history.b) < d.history.maxSize { - d.history.b = make([]byte, 0, d.history.maxSize) - } - if cap(d.decoding) < d.o.concurrent { - d.decoding = make(chan *blockDec, d.o.concurrent) - } - if debug { - h := d.history - printf("history init. len: %d, cap: %d", len(h.b), cap(h.b)) - } - d.asyncRunningMu.Lock() - d.asyncRunning = true - d.asyncRunningMu.Unlock() -} - -// startDecoder will start decoding blocks and write them to the writer. -// The decoder will stop as soon as an error occurs or at end of frame. -// When the frame has finished decoding the *bufio.Reader -// containing the remaining input will be sent on frameDec.frameDone. -func (d *frameDec) startDecoder(output chan decodeOutput) { - written := int64(0) - - defer func() { - d.asyncRunningMu.Lock() - d.asyncRunning = false - d.asyncRunningMu.Unlock() - - // Drain the currently decoding. - d.history.error = true - flushdone: - for { - select { - case b := <-d.decoding: - b.history <- &d.history - output <- <-b.result - default: - break flushdone - } - } - println("frame decoder done, signalling done") - d.frameDone.Done() - }() - // Get decoder for first block. - block := <-d.decoding - block.history <- &d.history - for { - var next *blockDec - // Get result - r := <-block.result - if r.err != nil { - println("Result contained error", r.err) - output <- r - return - } - if debug { - println("got result, from ", d.offset, "to", d.offset+int64(len(r.b))) - d.offset += int64(len(r.b)) - } - if !block.Last { - // Send history to next block - select { - case next = <-d.decoding: - if debug { - println("Sending ", len(d.history.b), "bytes as history") - } - next.history <- &d.history - default: - // Wait until we have sent the block, so - // other decoders can potentially get the decoder. - next = nil - } - } - - // Add checksum, async to decoding. - if d.HasCheckSum { - n, err := d.crc.Write(r.b) - if err != nil { - r.err = err - if n != len(r.b) { - r.err = io.ErrShortWrite - } - output <- r - return - } - } - written += int64(len(r.b)) - if d.SingleSegment && uint64(written) > d.FrameContentSize { - println("runDecoder: single segment and", uint64(written), ">", d.FrameContentSize) - r.err = ErrFrameSizeExceeded - output <- r - return - } - if block.Last { - r.err = d.checkCRC() - output <- r - return - } - output <- r - if next == nil { - // There was no decoder available, we wait for one now that we have sent to the writer. - if debug { - println("Sending ", len(d.history.b), " bytes as history") - } - next = <-d.decoding - next.history <- &d.history +// consumeCRC reads the checksum data if the frame has one. +func (d *frameDec) consumeCRC() error { + if d.HasCheckSum { + _, err := d.rawInput.readSmall(4) + if err != nil { + println("CRC missing?", err) + return err } - block = next } + + return nil } // runDecoder will create a sync decoder that will decode a block of data. @@ -450,41 +341,67 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) { // We use the history for output to avoid copying it. d.history.b = dst + d.history.ignoreBuffer = len(dst) // Store input length, so we only check new data. crcStart := len(dst) + d.history.decoders.maxSyncLen = 0 + if d.FrameContentSize != fcsUnknown { + d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst)) + if d.history.decoders.maxSyncLen > d.o.maxDecodedSize { + return dst, ErrDecoderSizeExceeded + } + if uint64(cap(dst)) < d.history.decoders.maxSyncLen { + // Alloc for output + dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc) + copy(dst2, dst) + dst = dst2 + } + } var err error for { err = dec.reset(d.rawInput, d.WindowSize) if err != nil { break } - if debug { + if debugDecoder { println("next block:", dec) } err = dec.decodeBuf(&d.history) - if err != nil || dec.Last { + if err != nil { break } if uint64(len(d.history.b)) > d.o.maxDecodedSize { err = ErrDecoderSizeExceeded break } - if d.SingleSegment && uint64(len(d.history.b)) > d.o.maxDecodedSize { - println("runDecoder: single segment and", uint64(len(d.history.b)), ">", d.o.maxDecodedSize) + if uint64(len(d.history.b)-crcStart) > d.FrameContentSize { + println("runDecoder: FrameContentSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.FrameContentSize) err = ErrFrameSizeExceeded break } + if dec.Last { + break + } + if debugDecoder { + println("runDecoder: FrameContentSize", uint64(len(d.history.b)-crcStart), "<=", d.FrameContentSize) + } } dst = d.history.b if err == nil { - if d.HasCheckSum { - var n int - n, err = d.crc.Write(dst[crcStart:]) - if err == nil { - if n != len(dst)-crcStart { - err = io.ErrShortWrite - } else { - err = d.checkCRC() + if d.FrameContentSize != fcsUnknown && uint64(len(d.history.b)-crcStart) != d.FrameContentSize { + err = ErrFrameSizeMismatch + } else if d.HasCheckSum { + if d.o.ignoreChecksum { + err = d.consumeCRC() + } else { + var n int + n, err = d.crc.Write(dst[crcStart:]) + if err == nil { + if n != len(dst)-crcStart { + err = io.ErrShortWrite + } else { + err = d.checkCRC() + } } } } diff --git a/vendor/github.com/klauspost/compress/zstd/frameenc.go b/vendor/github.com/klauspost/compress/zstd/frameenc.go index 4479cfe..4ef7f5a 100644 --- a/vendor/github.com/klauspost/compress/zstd/frameenc.go +++ b/vendor/github.com/klauspost/compress/zstd/frameenc.go @@ -5,6 +5,7 @@ package zstd import ( + "encoding/binary" "fmt" "io" "math" @@ -16,7 +17,7 @@ type frameHeader struct { WindowSize uint32 SingleSegment bool Checksum bool - DictID uint32 // Not stored. + DictID uint32 } const maxHeaderSize = 14 @@ -30,6 +31,24 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) { if f.SingleSegment { fhd |= 1 << 5 } + + var dictIDContent []byte + if f.DictID > 0 { + var tmp [4]byte + if f.DictID < 256 { + fhd |= 1 + tmp[0] = uint8(f.DictID) + dictIDContent = tmp[:1] + } else if f.DictID < 1<<16 { + fhd |= 2 + binary.LittleEndian.PutUint16(tmp[:2], uint16(f.DictID)) + dictIDContent = tmp[:2] + } else { + fhd |= 3 + binary.LittleEndian.PutUint32(tmp[:4], f.DictID) + dictIDContent = tmp[:4] + } + } var fcs uint8 if f.ContentSize >= 256 { fcs++ @@ -40,6 +59,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) { if f.ContentSize >= 0xffffffff { fcs++ } + fhd |= fcs << 6 dst = append(dst, fhd) @@ -48,7 +68,9 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) { windowLog := (bits.Len32(f.WindowSize-1) - winLogMin) << 3 dst = append(dst, uint8(windowLog)) } - + if f.DictID > 0 { + dst = append(dst, dictIDContent...) + } switch fcs { case 0: if f.SingleSegment { diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go index e6d3d49..fde4e6b 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go @@ -5,8 +5,10 @@ package zstd import ( + "encoding/binary" "errors" "fmt" + "io" ) const ( @@ -182,6 +184,29 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { return s.buildDtable() } +func (s *fseDecoder) mustReadFrom(r io.Reader) { + fatalErr := func(err error) { + if err != nil { + panic(err) + } + } + // dt [maxTablesize]decSymbol // Decompression table. + // symbolLen uint16 // Length of active part of the symbol table. + // actualTableLog uint8 // Selected tablelog. + // maxBits uint8 // Maximum number of additional bits + // // used for table creation to avoid allocations. + // stateTable [256]uint16 + // norm [maxSymbolValue + 1]int16 + // preDefined bool + fatalErr(binary.Read(r, binary.LittleEndian, &s.dt)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.symbolLen)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.actualTableLog)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.maxBits)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.stateTable)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.norm)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.preDefined)) +} + // decSymbol contains information about a state entry, // Including the state offset base, the output symbol and // the number of bits to read for the low part of the destination state. @@ -379,7 +404,7 @@ func (s decSymbol) final() (int, uint8) { // This can only be used if no symbols are 0 bits. // At least tablelog bits must be available in the bit reader. func (s *fseState) nextFast(br *bitReader) (uint32, uint8) { - lowBits := uint16(br.getBitsFast(s.state.nbBits())) + lowBits := br.get16BitsFast(s.state.nbBits()) s.state = s.dt[s.state.newState()+lowBits] return s.state.baseline(), s.state.addBits() } diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go index aa9eba8..5442061 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go @@ -62,9 +62,8 @@ func (s symbolTransform) String() string { // To indicate that you have populated the histogram call HistogramFinished // with the value of the highest populated symbol, as well as the number of entries // in the most populated entry. These are accepted at face value. -// The returned slice will always be length 256. -func (s *fseEncoder) Histogram() []uint32 { - return s.count[:] +func (s *fseEncoder) Histogram() *[256]uint32 { + return &s.count } // HistogramFinished can be called to indicate that the histogram has been populated. @@ -97,7 +96,7 @@ func (s *fseEncoder) prepare() (*fseEncoder, error) { func (s *fseEncoder) allocCtable() { tableSize := 1 << s.actualTableLog // get tableSymbol that is big enough. - if cap(s.ct.tableSymbol) < int(tableSize) { + if cap(s.ct.tableSymbol) < tableSize { s.ct.tableSymbol = make([]byte, tableSize) } s.ct.tableSymbol = s.ct.tableSymbol[:tableSize] @@ -202,13 +201,13 @@ func (s *fseEncoder) buildCTable() error { case 0: case -1, 1: symbolTT[i].deltaNbBits = tl - symbolTT[i].deltaFindState = int16(total - 1) + symbolTT[i].deltaFindState = total - 1 total++ default: maxBitsOut := uint32(tableLog) - highBit(uint32(v-1)) minStatePlus := uint32(v) << maxBitsOut symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus - symbolTT[i].deltaFindState = int16(total - v) + symbolTT[i].deltaFindState = total - v total += v } } @@ -229,7 +228,7 @@ func (s *fseEncoder) setRLE(val byte) { deltaFindState: 0, deltaNbBits: 0, } - if debug { + if debugEncoder { println("setRLE: val", val, "symbolTT", s.ct.symbolTT[val]) } s.rleVal = val @@ -353,8 +352,8 @@ func (s *fseEncoder) normalizeCount2(length int) error { distributed uint32 total = uint32(length) tableLog = s.actualTableLog - lowThreshold = uint32(total >> tableLog) - lowOne = uint32((total * 3) >> (tableLog + 1)) + lowThreshold = total >> tableLog + lowOne = (total * 3) >> (tableLog + 1) ) for i, cnt := range s.count[:s.symbolLen] { if cnt == 0 { @@ -379,7 +378,7 @@ func (s *fseEncoder) normalizeCount2(length int) error { if (total / toDistribute) > lowOne { // risk of rounding to zero - lowOne = uint32((total * 3) / (toDistribute * 2)) + lowOne = (total * 3) / (toDistribute * 2) for i, cnt := range s.count[:s.symbolLen] { if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) { s.norm[i] = 1 @@ -708,7 +707,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) { im := int32((nbBitsOut << 16) - first.deltaNbBits) lu := (im >> nbBitsOut) + int32(first.deltaFindState) c.state = c.stateTable[lu] - return } // encode the output symbol provided and write it to the bitstream. diff --git a/vendor/github.com/klauspost/compress/zstd/fse_predefined.go b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go index 6c17dc1..474cb77 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_predefined.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go @@ -59,7 +59,7 @@ func fillBase(dst []baseOffset, base uint32, bits ...uint8) { } for i, bit := range bits { if base > math.MaxInt32 { - panic(fmt.Sprintf("invalid decoding table, base overflows int32")) + panic("invalid decoding table, base overflows int32") } dst[i] = baseOffset{ diff --git a/vendor/github.com/klauspost/compress/zstd/hash.go b/vendor/github.com/klauspost/compress/zstd/hash.go index 4a75206..cf33f29 100644 --- a/vendor/github.com/klauspost/compress/zstd/hash.go +++ b/vendor/github.com/klauspost/compress/zstd/hash.go @@ -13,24 +13,24 @@ const ( prime8bytes = 0xcf1bbcdcb7a56463 ) -// hashLen returns a hash of the lowest l bytes of u for a size size of h bytes. -// l must be >=4 and <=8. Any other value will return hash for 4 bytes. -// h should always be <32. -// Preferably h and l should be a constant. -// FIXME: This does NOT get resolved, if 'mls' is constant, -// so this cannot be used. -func hashLen(u uint64, hashLog, mls uint8) uint32 { +// hashLen returns a hash of the lowest mls bytes of with length output bits. +// mls must be >=3 and <=8. Any other value will return hash for 4 bytes. +// length should always be < 32. +// Preferably length and mls should be a constant for inlining. +func hashLen(u uint64, length, mls uint8) uint32 { switch mls { + case 3: + return (uint32(u<<8) * prime3bytes) >> (32 - length) case 5: - return hash5(u, hashLog) + return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length)) case 6: - return hash6(u, hashLog) + return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length)) case 7: - return hash7(u, hashLog) + return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length)) case 8: - return hash8(u, hashLog) + return uint32((u * prime8bytes) >> (64 - length)) default: - return hash4x64(u, hashLog) + return (uint32(u) * prime4bytes) >> (32 - length) } } @@ -39,39 +39,3 @@ func hashLen(u uint64, hashLog, mls uint8) uint32 { func hash3(u uint32, h uint8) uint32 { return ((u << (32 - 24)) * prime3bytes) >> ((32 - h) & 31) } - -// hash4 returns the hash of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <32. -func hash4(u uint32, h uint8) uint32 { - return (u * prime4bytes) >> ((32 - h) & 31) -} - -// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <32. -func hash4x64(u uint64, h uint8) uint32 { - return (uint32(u) * prime4bytes) >> ((32 - h) & 31) -} - -// hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash5(u uint64, h uint8) uint32 { - return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63)) -} - -// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash6(u uint64, h uint8) uint32 { - return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63)) -} - -// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash7(u uint64, h uint8) uint32 { - return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63)) -} - -// hash8 returns the hash of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash8(u uint64, h uint8) uint32 { - return uint32((u * prime8bytes) >> ((64 - h) & 63)) -} diff --git a/vendor/github.com/klauspost/compress/zstd/history.go b/vendor/github.com/klauspost/compress/zstd/history.go index f418f50..28b4015 100644 --- a/vendor/github.com/klauspost/compress/zstd/history.go +++ b/vendor/github.com/klauspost/compress/zstd/history.go @@ -10,20 +10,31 @@ import ( // history contains the information transferred between blocks. type history struct { - b []byte - huffTree *huff0.Scratch - recentOffsets [3]int + // Literal decompression + huffTree *huff0.Scratch + + // Sequence decompression decoders sequenceDecs - windowSize int - maxSize int - error bool - dict *dict + recentOffsets [3]int + + // History buffer... + b []byte + + // ignoreBuffer is meant to ignore a number of bytes + // when checking for matches in history + ignoreBuffer int + + windowSize int + allocFrameBuffer int // needed? + error bool + dict *dict } // reset will reset the history to initial state of a frame. // The history must already have been initialized to the desired size. func (h *history) reset() { h.b = h.b[:0] + h.ignoreBuffer = 0 h.error = false h.recentOffsets = [3]int{1, 4, 8} if f := h.decoders.litLengths.fse; f != nil && !f.preDefined { @@ -35,9 +46,9 @@ func (h *history) reset() { if f := h.decoders.matchLengths.fse; f != nil && !f.preDefined { fseDecoderPool.Put(f) } - h.decoders = sequenceDecs{} + h.decoders = sequenceDecs{br: h.decoders.br} if h.huffTree != nil { - if h.dict == nil || h.dict.litDec != h.huffTree { + if h.dict == nil || h.dict.litEnc != h.huffTree { huffDecoderPool.Put(h.huffTree) } } @@ -54,8 +65,9 @@ func (h *history) setDict(dict *dict) { h.decoders.litLengths = dict.llDec h.decoders.offsets = dict.ofDec h.decoders.matchLengths = dict.mlDec + h.decoders.dict = dict.content h.recentOffsets = dict.offsets - h.huffTree = dict.litDec + h.huffTree = dict.litEnc } // append bytes to history. @@ -83,6 +95,24 @@ func (h *history) append(b []byte) { copy(h.b[h.windowSize-len(b):], b) } +// ensureBlock will ensure there is space for at least one block... +func (h *history) ensureBlock() { + if cap(h.b) < h.allocFrameBuffer { + h.b = make([]byte, 0, h.allocFrameBuffer) + return + } + + avail := cap(h.b) - len(h.b) + if avail >= h.windowSize || avail > maxCompressedBlockSize { + return + } + // Move data down so we only have window size left. + // We know we have less than window size in b at this point. + discard := len(h.b) - h.windowSize + copy(h.b, h.b[discard:]) + h.b = h.b[:h.windowSize] +} + // append bytes to history without ever discarding anything. func (h *history) appendKeep(b []byte) { h.b = append(h.b, b...) diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go index 426b9ca..2c112a0 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go @@ -195,7 +195,6 @@ func (d *Digest) UnmarshalBinary(b []byte) error { b, d.v4 = consumeUint64(b) b, d.total = consumeUint64(b) copy(d.mem[:], b) - b = b[len(d.mem):] d.n = int(d.total % uint64(len(d.mem))) return nil } diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s index 2c9c535..cea1785 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s @@ -1,12 +1,13 @@ // +build !appengine // +build gc // +build !purego +// +build !noasm #include "textflag.h" // Register allocation: // AX h -// CX pointer to advance through b +// SI pointer to advance through b // DX n // BX loop end // R8 v1, k1 @@ -16,39 +17,39 @@ // R12 tmp // R13 prime1v // R14 prime2v -// R15 prime4v +// DI prime4v -// round reads from and advances the buffer pointer in CX. +// round reads from and advances the buffer pointer in SI. // It assumes that R13 has prime1v and R14 has prime2v. #define round(r) \ - MOVQ (CX), R12 \ - ADDQ $8, CX \ + MOVQ (SI), R12 \ + ADDQ $8, SI \ IMULQ R14, R12 \ ADDQ R12, r \ ROLQ $31, r \ IMULQ R13, r // mergeRound applies a merge round on the two registers acc and val. -// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v. +// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v. #define mergeRound(acc, val) \ IMULQ R14, val \ ROLQ $31, val \ IMULQ R13, val \ XORQ val, acc \ IMULQ R13, acc \ - ADDQ R15, acc + ADDQ DI, acc // func Sum64(b []byte) uint64 TEXT ·Sum64(SB), NOSPLIT, $0-32 // Load fixed primes. MOVQ ·prime1v(SB), R13 MOVQ ·prime2v(SB), R14 - MOVQ ·prime4v(SB), R15 + MOVQ ·prime4v(SB), DI // Load slice. - MOVQ b_base+0(FP), CX + MOVQ b_base+0(FP), SI MOVQ b_len+8(FP), DX - LEAQ (CX)(DX*1), BX + LEAQ (SI)(DX*1), BX // The first loop limit will be len(b)-32. SUBQ $32, BX @@ -65,14 +66,14 @@ TEXT ·Sum64(SB), NOSPLIT, $0-32 XORQ R11, R11 SUBQ R13, R11 - // Loop until CX > BX. + // Loop until SI > BX. blockLoop: round(R8) round(R9) round(R10) round(R11) - CMPQ CX, BX + CMPQ SI, BX JLE blockLoop MOVQ R8, AX @@ -100,16 +101,16 @@ noBlocks: afterBlocks: ADDQ DX, AX - // Right now BX has len(b)-32, and we want to loop until CX > len(b)-8. + // Right now BX has len(b)-32, and we want to loop until SI > len(b)-8. ADDQ $24, BX - CMPQ CX, BX + CMPQ SI, BX JG fourByte wordLoop: // Calculate k1. - MOVQ (CX), R8 - ADDQ $8, CX + MOVQ (SI), R8 + ADDQ $8, SI IMULQ R14, R8 ROLQ $31, R8 IMULQ R13, R8 @@ -117,18 +118,18 @@ wordLoop: XORQ R8, AX ROLQ $27, AX IMULQ R13, AX - ADDQ R15, AX + ADDQ DI, AX - CMPQ CX, BX + CMPQ SI, BX JLE wordLoop fourByte: ADDQ $4, BX - CMPQ CX, BX + CMPQ SI, BX JG singles - MOVL (CX), R8 - ADDQ $4, CX + MOVL (SI), R8 + ADDQ $4, SI IMULQ R13, R8 XORQ R8, AX @@ -138,19 +139,19 @@ fourByte: singles: ADDQ $4, BX - CMPQ CX, BX + CMPQ SI, BX JGE finalize singlesLoop: - MOVBQZX (CX), R12 - ADDQ $1, CX + MOVBQZX (SI), R12 + ADDQ $1, SI IMULQ ·prime5v(SB), R12 XORQ R12, AX ROLQ $11, AX IMULQ R13, AX - CMPQ CX, BX + CMPQ SI, BX JL singlesLoop finalize: @@ -179,13 +180,13 @@ TEXT ·writeBlocks(SB), NOSPLIT, $0-40 MOVQ ·prime2v(SB), R14 // Load slice. - MOVQ arg1_base+8(FP), CX - MOVQ arg1_len+16(FP), DX - LEAQ (CX)(DX*1), BX + MOVQ b_base+8(FP), SI + MOVQ b_len+16(FP), DX + LEAQ (SI)(DX*1), BX SUBQ $32, BX // Load vN from d. - MOVQ arg+0(FP), AX + MOVQ d+0(FP), AX MOVQ 0(AX), R8 // v1 MOVQ 8(AX), R9 // v2 MOVQ 16(AX), R10 // v3 @@ -199,7 +200,7 @@ blockLoop: round(R10) round(R11) - CMPQ CX, BX + CMPQ SI, BX JLE blockLoop // Copy vN back to d. @@ -208,8 +209,8 @@ blockLoop: MOVQ R10, 16(AX) MOVQ R11, 24(AX) - // The number of bytes written is CX minus the old base pointer. - SUBQ arg1_base+8(FP), CX - MOVQ CX, ret+32(FP) + // The number of bytes written is SI minus the old base pointer. + SUBQ b_base+8(FP), SI + MOVQ SI, ret+32(FP) RET diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s new file mode 100644 index 0000000..4d64a17 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s @@ -0,0 +1,186 @@ +// +build gc,!purego,!noasm + +#include "textflag.h" + +// Register allocation. +#define digest R1 +#define h R2 // Return value. +#define p R3 // Input pointer. +#define len R4 +#define nblocks R5 // len / 32. +#define prime1 R7 +#define prime2 R8 +#define prime3 R9 +#define prime4 R10 +#define prime5 R11 +#define v1 R12 +#define v2 R13 +#define v3 R14 +#define v4 R15 +#define x1 R20 +#define x2 R21 +#define x3 R22 +#define x4 R23 + +#define round(acc, x) \ + MADD prime2, acc, x, acc \ + ROR $64-31, acc \ + MUL prime1, acc \ + +// x = round(0, x). +#define round0(x) \ + MUL prime2, x \ + ROR $64-31, x \ + MUL prime1, x \ + +#define mergeRound(x) \ + round0(x) \ + EOR x, h \ + MADD h, prime4, prime1, h \ + +// Update v[1-4] with 32-byte blocks. Assumes len >= 32. +#define blocksLoop() \ + LSR $5, len, nblocks \ + PCALIGN $16 \ + loop: \ + LDP.P 32(p), (x1, x2) \ + round(v1, x1) \ + LDP -16(p), (x3, x4) \ + round(v2, x2) \ + SUB $1, nblocks \ + round(v3, x3) \ + round(v4, x4) \ + CBNZ nblocks, loop \ + +// The primes are repeated here to ensure that they're stored +// in a contiguous array, so we can load them with LDP. +DATA primes<> +0(SB)/8, $11400714785074694791 +DATA primes<> +8(SB)/8, $14029467366897019727 +DATA primes<>+16(SB)/8, $1609587929392839161 +DATA primes<>+24(SB)/8, $9650029242287828579 +DATA primes<>+32(SB)/8, $2870177450012600261 +GLOBL primes<>(SB), NOPTR+RODATA, $40 + +// func Sum64(b []byte) uint64 +TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32 + LDP b_base+0(FP), (p, len) + + LDP primes<> +0(SB), (prime1, prime2) + LDP primes<>+16(SB), (prime3, prime4) + MOVD primes<>+32(SB), prime5 + + CMP $32, len + CSEL LO, prime5, ZR, h // if len < 32 { h = prime5 } else { h = 0 } + BLO afterLoop + + ADD prime1, prime2, v1 + MOVD prime2, v2 + MOVD $0, v3 + NEG prime1, v4 + + blocksLoop() + + ROR $64-1, v1, x1 + ROR $64-7, v2, x2 + ADD x1, x2 + ROR $64-12, v3, x3 + ROR $64-18, v4, x4 + ADD x3, x4 + ADD x2, x4, h + + mergeRound(v1) + mergeRound(v2) + mergeRound(v3) + mergeRound(v4) + +afterLoop: + ADD len, h + + TBZ $4, len, try8 + LDP.P 16(p), (x1, x2) + + round0(x1) + ROR $64-27, h + EOR x1 @> 64-27, h, h + MADD h, prime4, prime1, h + + round0(x2) + ROR $64-27, h + EOR x2 @> 64-27, h + MADD h, prime4, prime1, h + +try8: + TBZ $3, len, try4 + MOVD.P 8(p), x1 + + round0(x1) + ROR $64-27, h + EOR x1 @> 64-27, h + MADD h, prime4, prime1, h + +try4: + TBZ $2, len, try2 + MOVWU.P 4(p), x2 + + MUL prime1, x2 + ROR $64-23, h + EOR x2 @> 64-23, h + MADD h, prime3, prime2, h + +try2: + TBZ $1, len, try1 + MOVHU.P 2(p), x3 + AND $255, x3, x1 + LSR $8, x3, x2 + + MUL prime5, x1 + ROR $64-11, h + EOR x1 @> 64-11, h + MUL prime1, h + + MUL prime5, x2 + ROR $64-11, h + EOR x2 @> 64-11, h + MUL prime1, h + +try1: + TBZ $0, len, end + MOVBU (p), x4 + + MUL prime5, x4 + ROR $64-11, h + EOR x4 @> 64-11, h + MUL prime1, h + +end: + EOR h >> 33, h + MUL prime2, h + EOR h >> 29, h + MUL prime3, h + EOR h >> 32, h + + MOVD h, ret+24(FP) + RET + +// func writeBlocks(d *Digest, b []byte) int +// +// Assumes len(b) >= 32. +TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40 + LDP primes<>(SB), (prime1, prime2) + + // Load state. Assume v[1-4] are stored contiguously. + MOVD d+0(FP), digest + LDP 0(digest), (v1, v2) + LDP 16(digest), (v3, v4) + + LDP b_base+8(FP), (p, len) + + blocksLoop() + + // Store updated state. + STP (v1, v2), 0(digest) + STP (v3, v4), 16(digest) + + BIC $31, len + MOVD len, ret+32(FP) + RET diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go similarity index 54% rename from vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go rename to vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go index 35318d7..1a1fac9 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go @@ -1,6 +1,9 @@ +//go:build (amd64 || arm64) && !appengine && gc && !purego && !noasm +// +build amd64 arm64 // +build !appengine // +build gc // +build !purego +// +build !noasm package xxhash @@ -10,4 +13,4 @@ package xxhash func Sum64(b []byte) uint64 //go:noescape -func writeBlocks(*Digest, []byte) int +func writeBlocks(d *Digest, b []byte) int diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go index 4a5a821..209cb4a 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go @@ -1,4 +1,5 @@ -// +build !amd64 appengine !gc purego +//go:build (!amd64 && !arm64) || appengine || !gc || purego || noasm +// +build !amd64,!arm64 appengine !gc purego noasm package xxhash diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go index 7ff8704..e80139d 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go @@ -20,6 +20,10 @@ type seq struct { llCode, mlCode, ofCode uint8 } +type seqVals struct { + ll, ml, mo int +} + func (s seq) String() string { if s.offset <= 3 { if s.offset == 0 { @@ -61,16 +65,19 @@ type sequenceDecs struct { offsets sequenceDec matchLengths sequenceDec prevOffset [3]int - hist []byte dict []byte literals []byte out []byte + nSeqs int + br *bitReader + seqSize int windowSize int maxBits uint8 + maxSyncLen uint64 } // initialize all 3 decoders from the stream input. -func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out []byte) error { +func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) error { if err := s.litLengths.init(br); err != nil { return errors.New("litLengths:" + err.Error()) } @@ -80,8 +87,7 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out [] if err := s.matchLengths.init(br); err != nil { return errors.New("matchLengths:" + err.Error()) } - s.literals = literals - s.hist = hist.b + s.br = br s.prevOffset = hist.recentOffsets s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits s.windowSize = hist.windowSize @@ -93,12 +99,127 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out [] return nil } +// execute will execute the decoded sequence with the provided history. +// The sequence must be evaluated before being sent. +func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error { + if len(s.dict) == 0 { + return s.executeSimple(seqs, hist) + } + + // Ensure we have enough output size... + if len(s.out)+s.seqSize > cap(s.out) { + addBytes := s.seqSize + len(s.out) + s.out = append(s.out, make([]byte, addBytes)...) + s.out = s.out[:len(s.out)-addBytes] + } + + if debugDecoder { + printf("Execute %d seqs with hist %d, dict %d, literals: %d into %d bytes\n", len(seqs), len(hist), len(s.dict), len(s.literals), s.seqSize) + } + + var t = len(s.out) + out := s.out[:t+s.seqSize] + + for _, seq := range seqs { + // Add literals + copy(out[t:], s.literals[:seq.ll]) + t += seq.ll + s.literals = s.literals[seq.ll:] + + // Copy from dictionary... + if seq.mo > t+len(hist) || seq.mo > s.windowSize { + if len(s.dict) == 0 { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist)) + } + + // we may be in dictionary. + dictO := len(s.dict) - (seq.mo - (t + len(hist))) + if dictO < 0 || dictO >= len(s.dict) { + return fmt.Errorf("match offset (%d) bigger than current history+dict (%d)", seq.mo, t+len(hist)+len(s.dict)) + } + end := dictO + seq.ml + if end > len(s.dict) { + n := len(s.dict) - dictO + copy(out[t:], s.dict[dictO:]) + t += n + seq.ml -= n + } else { + copy(out[t:], s.dict[dictO:end]) + t += end - dictO + continue + } + } + + // Copy from history. + if v := seq.mo - t; v > 0 { + // v is the start position in history from end. + start := len(hist) - v + if seq.ml > v { + // Some goes into current block. + // Copy remainder of history + copy(out[t:], hist[start:]) + t += v + seq.ml -= v + } else { + copy(out[t:], hist[start:start+seq.ml]) + t += seq.ml + continue + } + } + // We must be in current buffer now + if seq.ml > 0 { + start := t - seq.mo + if seq.ml <= t-start { + // No overlap + copy(out[t:], out[start:start+seq.ml]) + t += seq.ml + continue + } else { + // Overlapping copy + // Extend destination slice and copy one byte at the time. + src := out[start : start+seq.ml] + dst := out[t:] + dst = dst[:len(src)] + t += len(src) + // Destination is the space we just added. + for i := range src { + dst[i] = src[i] + } + } + } + } + // Add final literals + copy(out[t:], s.literals) + if debugDecoder { + t += len(s.literals) + if t != len(out) { + panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize)) + } + } + s.out = out + + return nil +} + // decode sequences from the stream with the provided history. -func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { +func (s *sequenceDecs) decodeSync(hist []byte) error { + if true { + supported, err := s.decodeSyncSimple(hist) + if supported { + return err + } + } + br := s.br + seqs := s.nSeqs startSize := len(s.out) // Grab full sizes tables, to avoid bounds checks. llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize] llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state + out := s.out + maxBlockSize := maxCompressedBlockSize + if s.windowSize < maxBlockSize { + maxBlockSize = s.windowSize + } for i := seqs - 1; i >= 0; i-- { if br.overread() { @@ -151,7 +272,7 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { if temp == 0 { // 0 is not valid; input is corrupted; force offset to 1 - println("temp was 0") + println("WARNING: temp was 0") temp = 1 } @@ -176,40 +297,49 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { if ll > len(s.literals) { return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals)) } - size := ll + ml + len(s.out) + size := ll + ml + len(out) if size-startSize > maxBlockSize { - return fmt.Errorf("output (%d) bigger than max block size", size) + return fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize) } - if size > cap(s.out) { - // Not enough size, will be extremely rarely triggered, + if size > cap(out) { + // Not enough size, which can happen under high volume block streaming conditions // but could be if destination slice is too small for sync operations. - // We add maxBlockSize to the capacity. - s.out = append(s.out, make([]byte, maxBlockSize)...) - s.out = s.out[:len(s.out)-maxBlockSize] + // over-allocating here can create a large amount of GC pressure so we try to keep + // it as contained as possible + used := len(out) - startSize + addBytes := 256 + ll + ml + used>>2 + // Clamp to max block size. + if used+addBytes > maxBlockSize { + addBytes = maxBlockSize - used + } + out = append(out, make([]byte, addBytes)...) + out = out[:len(out)-addBytes] } if ml > maxMatchLen { return fmt.Errorf("match len (%d) bigger than max allowed length", ml) } // Add literals - s.out = append(s.out, s.literals[:ll]...) + out = append(out, s.literals[:ll]...) s.literals = s.literals[ll:] - out := s.out - if mo > len(s.out)+len(hist) || mo > s.windowSize { + if mo == 0 && ml > 0 { + return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) + } + + if mo > len(out)+len(hist) || mo > s.windowSize { if len(s.dict) == 0 { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist)) + return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize) } // we may be in dictionary. - dictO := len(s.dict) - (mo - (len(s.out) + len(hist))) + dictO := len(s.dict) - (mo - (len(out) + len(hist))) if dictO < 0 || dictO >= len(s.dict) { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist)) + return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize) } end := dictO + ml if end > len(s.dict) { out = append(out, s.dict[dictO:]...) - mo -= len(s.dict) - dictO ml -= len(s.dict) - dictO } else { out = append(out, s.dict[dictO:end]...) @@ -218,32 +348,27 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { } } - if mo == 0 && ml > 0 { - return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) - } - // Copy from history. // TODO: Blocks without history could be made to ignore this completely. - if v := mo - len(s.out); v > 0 { + if v := mo - len(out); v > 0 { // v is the start position in history from end. - start := len(s.hist) - v + start := len(hist) - v if ml > v { // Some goes into current block. // Copy remainder of history - out = append(out, s.hist[start:]...) - mo -= v + out = append(out, hist[start:]...) ml -= v } else { - out = append(out, s.hist[start:start+ml]...) + out = append(out, hist[start:start+ml]...) ml = 0 } } // We must be in current buffer now if ml > 0 { - start := len(s.out) - mo - if ml <= len(s.out)-start { + start := len(out) - mo + if ml <= len(out)-start { // No overlap - out = append(out, s.out[start:start+ml]...) + out = append(out, out[start:start+ml]...) } else { // Overlapping copy // Extend destination slice and copy one byte at the time. @@ -257,7 +382,6 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { } } } - s.out = out if i == 0 { // This is the last sequence, so we shouldn't update state. break @@ -271,7 +395,7 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { mlState = mlTable[mlState.newState()&maxTableMask] ofState = ofTable[ofState.newState()&maxTableMask] } else { - bits := br.getBitsFast(nBits) + bits := br.get32BitsFast(nBits) lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31)) llState = llTable[(llState.newState()+lowBits)&maxTableMask] @@ -284,9 +408,14 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { } } + // Check if space for literals + if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize { + return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize) + } + // Add final literals - s.out = append(s.out, s.literals...) - return nil + s.out = append(out, s.literals...) + return br.close() } // update states, at least 27 bits must be available. @@ -319,7 +448,7 @@ func (s *sequenceDecs) updateAlt(br *bitReader) { s.offsets.state.state = s.offsets.state.dt[c.newState()] return } - bits := br.getBitsFast(nBits) + bits := br.get32BitsFast(nBits) lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31)) s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits] @@ -450,36 +579,3 @@ func (s *sequenceDecs) adjustOffset(offset, litLen int, offsetB uint8) int { s.prevOffset[0] = temp return temp } - -// mergeHistory will merge history. -func (s *sequenceDecs) mergeHistory(hist *sequenceDecs) (*sequenceDecs, error) { - for i := uint(0); i < 3; i++ { - var sNew, sHist *sequenceDec - switch i { - default: - // same as "case 0": - sNew = &s.litLengths - sHist = &hist.litLengths - case 1: - sNew = &s.offsets - sHist = &hist.offsets - case 2: - sNew = &s.matchLengths - sHist = &hist.matchLengths - } - if sNew.repeat { - if sHist.fse == nil { - return nil, fmt.Errorf("sequence stream %d, repeat requested, but no history", i) - } - continue - } - if sNew.fse == nil { - return nil, fmt.Errorf("sequence stream %d, no fse found", i) - } - if sHist.fse != nil && !sHist.fse.preDefined { - fseDecoderPool.Put(sHist.fse) - } - sHist.fse = sNew.fse - } - return hist, nil -} diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go new file mode 100644 index 0000000..4676b09 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go @@ -0,0 +1,350 @@ +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +package zstd + +import ( + "fmt" + + "github.com/klauspost/compress/internal/cpuinfo" +) + +type decodeSyncAsmContext struct { + llTable []decSymbol + mlTable []decSymbol + ofTable []decSymbol + llState uint64 + mlState uint64 + ofState uint64 + iteration int + litRemain int + out []byte + outPosition int + literals []byte + litPosition int + history []byte + windowSize int + ll int // set on error (not for all errors, please refer to _generate/gen.go) + ml int // set on error (not for all errors, please refer to _generate/gen.go) + mo int // set on error (not for all errors, please refer to _generate/gen.go) +} + +// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm. +// +// Please refer to seqdec_generic.go for the reference implementation. +//go:noescape +func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int + +// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions. +//go:noescape +func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int + +// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer. +//go:noescape +func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int + +// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer. +//go:noescape +func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int + +// decode sequences from the stream with the provided history but without a dictionary. +func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { + if len(s.dict) > 0 { + return false, nil + } + if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize { + return false, nil + } + useSafe := false + if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc { + useSafe = true + } + if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) { + useSafe = true + } + br := s.br + + maxBlockSize := maxCompressedBlockSize + if s.windowSize < maxBlockSize { + maxBlockSize = s.windowSize + } + + ctx := decodeSyncAsmContext{ + llTable: s.litLengths.fse.dt[:maxTablesize], + mlTable: s.matchLengths.fse.dt[:maxTablesize], + ofTable: s.offsets.fse.dt[:maxTablesize], + llState: uint64(s.litLengths.state.state), + mlState: uint64(s.matchLengths.state.state), + ofState: uint64(s.offsets.state.state), + iteration: s.nSeqs - 1, + litRemain: len(s.literals), + out: s.out, + outPosition: len(s.out), + literals: s.literals, + windowSize: s.windowSize, + history: hist, + } + + s.seqSize = 0 + startSize := len(s.out) + + var errCode int + if cpuinfo.HasBMI2() { + if useSafe { + errCode = sequenceDecs_decodeSync_safe_bmi2(s, br, &ctx) + } else { + errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx) + } + } else { + if useSafe { + errCode = sequenceDecs_decodeSync_safe_amd64(s, br, &ctx) + } else { + errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx) + } + } + switch errCode { + case noError: + break + + case errorMatchLenOfsMismatch: + return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml) + + case errorMatchLenTooBig: + return true, fmt.Errorf("match len (%d) bigger than max allowed length", ctx.ml) + + case errorMatchOffTooBig: + return true, fmt.Errorf("match offset (%d) bigger than current history (%d)", + ctx.mo, ctx.outPosition+len(hist)-startSize) + + case errorNotEnoughLiterals: + return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", + ctx.ll, ctx.litRemain+ctx.ll) + + case errorNotEnoughSpace: + size := ctx.outPosition + ctx.ll + ctx.ml + if debugDecoder { + println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize) + } + return true, fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize) + + default: + return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode) + } + + s.seqSize += ctx.litRemain + if s.seqSize > maxBlockSize { + return true, fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize) + } + err := br.close() + if err != nil { + printf("Closing sequences: %v, %+v\n", err, *br) + return true, err + } + + s.literals = s.literals[ctx.litPosition:] + t := ctx.outPosition + s.out = s.out[:t] + + // Add final literals + s.out = append(s.out, s.literals...) + if debugDecoder { + t += len(s.literals) + if t != len(s.out) { + panic(fmt.Errorf("length mismatch, want %d, got %d", len(s.out), t)) + } + } + + return true, nil +} + +// -------------------------------------------------------------------------------- + +type decodeAsmContext struct { + llTable []decSymbol + mlTable []decSymbol + ofTable []decSymbol + llState uint64 + mlState uint64 + ofState uint64 + iteration int + seqs []seqVals + litRemain int +} + +const noError = 0 + +// error reported when mo == 0 && ml > 0 +const errorMatchLenOfsMismatch = 1 + +// error reported when ml > maxMatchLen +const errorMatchLenTooBig = 2 + +// error reported when mo > available history or mo > s.windowSize +const errorMatchOffTooBig = 3 + +// error reported when the sum of literal lengths exeeceds the literal buffer size +const errorNotEnoughLiterals = 4 + +// error reported when capacity of `out` is too small +const errorNotEnoughSpace = 5 + +// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm. +// +// Please refer to seqdec_generic.go for the reference implementation. +//go:noescape +func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int + +// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm. +// +// Please refer to seqdec_generic.go for the reference implementation. +//go:noescape +func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int + +// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions. +//go:noescape +func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int + +// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions. +//go:noescape +func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int + +// decode sequences from the stream without the provided history. +func (s *sequenceDecs) decode(seqs []seqVals) error { + br := s.br + + maxBlockSize := maxCompressedBlockSize + if s.windowSize < maxBlockSize { + maxBlockSize = s.windowSize + } + + ctx := decodeAsmContext{ + llTable: s.litLengths.fse.dt[:maxTablesize], + mlTable: s.matchLengths.fse.dt[:maxTablesize], + ofTable: s.offsets.fse.dt[:maxTablesize], + llState: uint64(s.litLengths.state.state), + mlState: uint64(s.matchLengths.state.state), + ofState: uint64(s.offsets.state.state), + seqs: seqs, + iteration: len(seqs) - 1, + litRemain: len(s.literals), + } + + s.seqSize = 0 + lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56 + var errCode int + if cpuinfo.HasBMI2() { + if lte56bits { + errCode = sequenceDecs_decode_56_bmi2(s, br, &ctx) + } else { + errCode = sequenceDecs_decode_bmi2(s, br, &ctx) + } + } else { + if lte56bits { + errCode = sequenceDecs_decode_56_amd64(s, br, &ctx) + } else { + errCode = sequenceDecs_decode_amd64(s, br, &ctx) + } + } + if errCode != 0 { + i := len(seqs) - ctx.iteration - 1 + switch errCode { + case errorMatchLenOfsMismatch: + ml := ctx.seqs[i].ml + return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) + + case errorMatchLenTooBig: + ml := ctx.seqs[i].ml + return fmt.Errorf("match len (%d) bigger than max allowed length", ml) + + case errorNotEnoughLiterals: + ll := ctx.seqs[i].ll + return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll) + } + + return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode) + } + + if ctx.litRemain < 0 { + return fmt.Errorf("literal count is too big: total available %d, total requested %d", + len(s.literals), len(s.literals)-ctx.litRemain) + } + + s.seqSize += ctx.litRemain + if s.seqSize > maxBlockSize { + return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize) + } + err := br.close() + if err != nil { + printf("Closing sequences: %v, %+v\n", err, *br) + } + return err +} + +// -------------------------------------------------------------------------------- + +type executeAsmContext struct { + seqs []seqVals + seqIndex int + out []byte + history []byte + literals []byte + outPosition int + litPosition int + windowSize int +} + +// sequenceDecs_executeSimple_amd64 implements the main loop of sequenceDecs.executeSimple in x86 asm. +// +// Returns false if a match offset is too big. +// +// Please refer to seqdec_generic.go for the reference implementation. +//go:noescape +func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool + +// executeSimple handles cases when dictionary is not used. +func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error { + // Ensure we have enough output size... + if len(s.out)+s.seqSize+compressedBlockOverAlloc > cap(s.out) { + addBytes := s.seqSize + len(s.out) + compressedBlockOverAlloc + s.out = append(s.out, make([]byte, addBytes)...) + s.out = s.out[:len(s.out)-addBytes] + } + + if debugDecoder { + printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize) + } + + var t = len(s.out) + out := s.out[:t+s.seqSize] + + ctx := executeAsmContext{ + seqs: seqs, + seqIndex: 0, + out: out, + history: hist, + outPosition: t, + litPosition: 0, + literals: s.literals, + windowSize: s.windowSize, + } + + ok := sequenceDecs_executeSimple_amd64(&ctx) + if !ok { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", + seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist)) + } + s.literals = s.literals[ctx.litPosition:] + t = ctx.outPosition + + // Add final literals + copy(out[t:], s.literals) + if debugDecoder { + t += len(s.literals) + if t != len(out) { + panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize)) + } + } + s.out = out + + return nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s new file mode 100644 index 0000000..2585b2e --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s @@ -0,0 +1,3517 @@ +// Code generated by command: go run gen.go -out ../seqdec_amd64.s -pkg=zstd. DO NOT EDIT. + +//go:build !appengine && !noasm && gc && !noasm +// +build !appengine,!noasm,gc,!noasm + +// func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int +// Requires: CMOV +TEXT ·sequenceDecs_decode_amd64(SB), $8-32 + MOVQ br+8(FP), AX + MOVQ 32(AX), DX + MOVBQZX 40(AX), BX + MOVQ 24(AX), SI + MOVQ (AX), AX + ADDQ SI, AX + MOVQ AX, (SP) + MOVQ ctx+16(FP), AX + MOVQ 72(AX), DI + MOVQ 80(AX), R8 + MOVQ 88(AX), R9 + MOVQ 104(AX), R10 + MOVQ s+0(FP), AX + MOVQ 144(AX), R11 + MOVQ 152(AX), R12 + MOVQ 160(AX), R13 + +sequenceDecs_decode_amd64_main_loop: + MOVQ (SP), R14 + + // Fill bitreader to have enough for the offset and match length. + CMPQ SI, $0x08 + JL sequenceDecs_decode_amd64_fill_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R14 + MOVQ (R14), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decode_amd64_fill_end + +sequenceDecs_decode_amd64_fill_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decode_amd64_fill_end + CMPQ BX, $0x07 + JLE sequenceDecs_decode_amd64_fill_end + SHLQ $0x08, DX + SUBQ $0x01, R14 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R14), AX + ORQ AX, DX + JMP sequenceDecs_decode_amd64_fill_byte_by_byte + +sequenceDecs_decode_amd64_fill_end: + // Update offset + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, 16(R10) + + // Update match length + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, 8(R10) + + // Fill bitreader to have enough for the remaining + CMPQ SI, $0x08 + JL sequenceDecs_decode_amd64_fill_2_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R14 + MOVQ (R14), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decode_amd64_fill_2_end + +sequenceDecs_decode_amd64_fill_2_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decode_amd64_fill_2_end + CMPQ BX, $0x07 + JLE sequenceDecs_decode_amd64_fill_2_end + SHLQ $0x08, DX + SUBQ $0x01, R14 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R14), AX + ORQ AX, DX + JMP sequenceDecs_decode_amd64_fill_2_byte_by_byte + +sequenceDecs_decode_amd64_fill_2_end: + // Update literal length + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, (R10) + + // Fill bitreader for state updates + MOVQ R14, (SP) + MOVQ R9, AX + SHRQ $0x08, AX + MOVBQZX AL, AX + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_amd64_skip_update + + // Update Literal Length State + MOVBQZX DI, R14 + SHRQ $0x10, DI + MOVWQZX DI, DI + CMPQ R14, $0x00 + JZ sequenceDecs_decode_amd64_llState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, DI + +sequenceDecs_decode_amd64_llState_updateState_skip_zero: + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(DI*8), DI + + // Update Match Length State + MOVBQZX R8, R14 + SHRQ $0x10, R8 + MOVWQZX R8, R8 + CMPQ R14, $0x00 + JZ sequenceDecs_decode_amd64_mlState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, R8 + +sequenceDecs_decode_amd64_mlState_updateState_skip_zero: + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Offset State + MOVBQZX R9, R14 + SHRQ $0x10, R9 + MOVWQZX R9, R9 + CMPQ R14, $0x00 + JZ sequenceDecs_decode_amd64_ofState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, R9 + +sequenceDecs_decode_amd64_ofState_updateState_skip_zero: + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R9*8), R9 + +sequenceDecs_decode_amd64_skip_update: + // Adjust offset + MOVQ 16(R10), CX + CMPQ AX, $0x01 + JBE sequenceDecs_decode_amd64_adjust_offsetB_1_or_0 + MOVQ R12, R13 + MOVQ R11, R12 + MOVQ CX, R11 + JMP sequenceDecs_decode_amd64_adjust_end + +sequenceDecs_decode_amd64_adjust_offsetB_1_or_0: + CMPQ (R10), $0x00000000 + JNE sequenceDecs_decode_amd64_adjust_offset_maybezero + INCQ CX + JMP sequenceDecs_decode_amd64_adjust_offset_nonzero + +sequenceDecs_decode_amd64_adjust_offset_maybezero: + TESTQ CX, CX + JNZ sequenceDecs_decode_amd64_adjust_offset_nonzero + MOVQ R11, CX + JMP sequenceDecs_decode_amd64_adjust_end + +sequenceDecs_decode_amd64_adjust_offset_nonzero: + CMPQ CX, $0x01 + JB sequenceDecs_decode_amd64_adjust_zero + JEQ sequenceDecs_decode_amd64_adjust_one + CMPQ CX, $0x02 + JA sequenceDecs_decode_amd64_adjust_three + JMP sequenceDecs_decode_amd64_adjust_two + +sequenceDecs_decode_amd64_adjust_zero: + MOVQ R11, AX + JMP sequenceDecs_decode_amd64_adjust_test_temp_valid + +sequenceDecs_decode_amd64_adjust_one: + MOVQ R12, AX + JMP sequenceDecs_decode_amd64_adjust_test_temp_valid + +sequenceDecs_decode_amd64_adjust_two: + MOVQ R13, AX + JMP sequenceDecs_decode_amd64_adjust_test_temp_valid + +sequenceDecs_decode_amd64_adjust_three: + LEAQ -1(R11), AX + +sequenceDecs_decode_amd64_adjust_test_temp_valid: + TESTQ AX, AX + JNZ sequenceDecs_decode_amd64_adjust_temp_valid + MOVQ $0x00000001, AX + +sequenceDecs_decode_amd64_adjust_temp_valid: + CMPQ CX, $0x01 + CMOVQNE R12, R13 + MOVQ R11, R12 + MOVQ AX, R11 + MOVQ AX, CX + +sequenceDecs_decode_amd64_adjust_end: + MOVQ CX, 16(R10) + + // Check values + MOVQ 8(R10), AX + MOVQ (R10), R14 + LEAQ (AX)(R14*1), R15 + MOVQ s+0(FP), BP + ADDQ R15, 256(BP) + MOVQ ctx+16(FP), R15 + SUBQ R14, 128(R15) + JS error_not_enough_literals + CMPQ AX, $0x00020002 + JA sequenceDecs_decode_amd64_error_match_len_too_big + TESTQ CX, CX + JNZ sequenceDecs_decode_amd64_match_len_ofs_ok + TESTQ AX, AX + JNZ sequenceDecs_decode_amd64_error_match_len_ofs_mismatch + +sequenceDecs_decode_amd64_match_len_ofs_ok: + ADDQ $0x18, R10 + MOVQ ctx+16(FP), AX + DECQ 96(AX) + JNS sequenceDecs_decode_amd64_main_loop + MOVQ s+0(FP), AX + MOVQ R11, 144(AX) + MOVQ R12, 152(AX) + MOVQ R13, 160(AX) + MOVQ br+8(FP), AX + MOVQ DX, 32(AX) + MOVB BL, 40(AX) + MOVQ SI, 24(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decode_amd64_error_match_len_ofs_mismatch: + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decode_amd64_error_match_len_too_big: + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int +// Requires: CMOV +TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32 + MOVQ br+8(FP), AX + MOVQ 32(AX), DX + MOVBQZX 40(AX), BX + MOVQ 24(AX), SI + MOVQ (AX), AX + ADDQ SI, AX + MOVQ AX, (SP) + MOVQ ctx+16(FP), AX + MOVQ 72(AX), DI + MOVQ 80(AX), R8 + MOVQ 88(AX), R9 + MOVQ 104(AX), R10 + MOVQ s+0(FP), AX + MOVQ 144(AX), R11 + MOVQ 152(AX), R12 + MOVQ 160(AX), R13 + +sequenceDecs_decode_56_amd64_main_loop: + MOVQ (SP), R14 + + // Fill bitreader to have enough for the offset and match length. + CMPQ SI, $0x08 + JL sequenceDecs_decode_56_amd64_fill_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R14 + MOVQ (R14), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decode_56_amd64_fill_end + +sequenceDecs_decode_56_amd64_fill_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decode_56_amd64_fill_end + CMPQ BX, $0x07 + JLE sequenceDecs_decode_56_amd64_fill_end + SHLQ $0x08, DX + SUBQ $0x01, R14 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R14), AX + ORQ AX, DX + JMP sequenceDecs_decode_56_amd64_fill_byte_by_byte + +sequenceDecs_decode_56_amd64_fill_end: + // Update offset + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, 16(R10) + + // Update match length + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, 8(R10) + + // Update literal length + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, (R10) + + // Fill bitreader for state updates + MOVQ R14, (SP) + MOVQ R9, AX + SHRQ $0x08, AX + MOVBQZX AL, AX + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_56_amd64_skip_update + + // Update Literal Length State + MOVBQZX DI, R14 + SHRQ $0x10, DI + MOVWQZX DI, DI + CMPQ R14, $0x00 + JZ sequenceDecs_decode_56_amd64_llState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, DI + +sequenceDecs_decode_56_amd64_llState_updateState_skip_zero: + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(DI*8), DI + + // Update Match Length State + MOVBQZX R8, R14 + SHRQ $0x10, R8 + MOVWQZX R8, R8 + CMPQ R14, $0x00 + JZ sequenceDecs_decode_56_amd64_mlState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, R8 + +sequenceDecs_decode_56_amd64_mlState_updateState_skip_zero: + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Offset State + MOVBQZX R9, R14 + SHRQ $0x10, R9 + MOVWQZX R9, R9 + CMPQ R14, $0x00 + JZ sequenceDecs_decode_56_amd64_ofState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, R9 + +sequenceDecs_decode_56_amd64_ofState_updateState_skip_zero: + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R9*8), R9 + +sequenceDecs_decode_56_amd64_skip_update: + // Adjust offset + MOVQ 16(R10), CX + CMPQ AX, $0x01 + JBE sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0 + MOVQ R12, R13 + MOVQ R11, R12 + MOVQ CX, R11 + JMP sequenceDecs_decode_56_amd64_adjust_end + +sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0: + CMPQ (R10), $0x00000000 + JNE sequenceDecs_decode_56_amd64_adjust_offset_maybezero + INCQ CX + JMP sequenceDecs_decode_56_amd64_adjust_offset_nonzero + +sequenceDecs_decode_56_amd64_adjust_offset_maybezero: + TESTQ CX, CX + JNZ sequenceDecs_decode_56_amd64_adjust_offset_nonzero + MOVQ R11, CX + JMP sequenceDecs_decode_56_amd64_adjust_end + +sequenceDecs_decode_56_amd64_adjust_offset_nonzero: + CMPQ CX, $0x01 + JB sequenceDecs_decode_56_amd64_adjust_zero + JEQ sequenceDecs_decode_56_amd64_adjust_one + CMPQ CX, $0x02 + JA sequenceDecs_decode_56_amd64_adjust_three + JMP sequenceDecs_decode_56_amd64_adjust_two + +sequenceDecs_decode_56_amd64_adjust_zero: + MOVQ R11, AX + JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid + +sequenceDecs_decode_56_amd64_adjust_one: + MOVQ R12, AX + JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid + +sequenceDecs_decode_56_amd64_adjust_two: + MOVQ R13, AX + JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid + +sequenceDecs_decode_56_amd64_adjust_three: + LEAQ -1(R11), AX + +sequenceDecs_decode_56_amd64_adjust_test_temp_valid: + TESTQ AX, AX + JNZ sequenceDecs_decode_56_amd64_adjust_temp_valid + MOVQ $0x00000001, AX + +sequenceDecs_decode_56_amd64_adjust_temp_valid: + CMPQ CX, $0x01 + CMOVQNE R12, R13 + MOVQ R11, R12 + MOVQ AX, R11 + MOVQ AX, CX + +sequenceDecs_decode_56_amd64_adjust_end: + MOVQ CX, 16(R10) + + // Check values + MOVQ 8(R10), AX + MOVQ (R10), R14 + LEAQ (AX)(R14*1), R15 + MOVQ s+0(FP), BP + ADDQ R15, 256(BP) + MOVQ ctx+16(FP), R15 + SUBQ R14, 128(R15) + JS error_not_enough_literals + CMPQ AX, $0x00020002 + JA sequenceDecs_decode_56_amd64_error_match_len_too_big + TESTQ CX, CX + JNZ sequenceDecs_decode_56_amd64_match_len_ofs_ok + TESTQ AX, AX + JNZ sequenceDecs_decode_56_amd64_error_match_len_ofs_mismatch + +sequenceDecs_decode_56_amd64_match_len_ofs_ok: + ADDQ $0x18, R10 + MOVQ ctx+16(FP), AX + DECQ 96(AX) + JNS sequenceDecs_decode_56_amd64_main_loop + MOVQ s+0(FP), AX + MOVQ R11, 144(AX) + MOVQ R12, 152(AX) + MOVQ R13, 160(AX) + MOVQ br+8(FP), AX + MOVQ DX, 32(AX) + MOVB BL, 40(AX) + MOVQ SI, 24(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decode_56_amd64_error_match_len_ofs_mismatch: + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decode_56_amd64_error_match_len_too_big: + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int +// Requires: BMI, BMI2, CMOV +TEXT ·sequenceDecs_decode_bmi2(SB), $8-32 + MOVQ br+8(FP), CX + MOVQ 32(CX), AX + MOVBQZX 40(CX), DX + MOVQ 24(CX), BX + MOVQ (CX), CX + ADDQ BX, CX + MOVQ CX, (SP) + MOVQ ctx+16(FP), CX + MOVQ 72(CX), SI + MOVQ 80(CX), DI + MOVQ 88(CX), R8 + MOVQ 104(CX), R9 + MOVQ s+0(FP), CX + MOVQ 144(CX), R10 + MOVQ 152(CX), R11 + MOVQ 160(CX), R12 + +sequenceDecs_decode_bmi2_main_loop: + MOVQ (SP), R13 + + // Fill bitreader to have enough for the offset and match length. + CMPQ BX, $0x08 + JL sequenceDecs_decode_bmi2_fill_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R13 + MOVQ (R13), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decode_bmi2_fill_end + +sequenceDecs_decode_bmi2_fill_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decode_bmi2_fill_end + CMPQ DX, $0x07 + JLE sequenceDecs_decode_bmi2_fill_end + SHLQ $0x08, AX + SUBQ $0x01, R13 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R13), CX + ORQ CX, AX + JMP sequenceDecs_decode_bmi2_fill_byte_by_byte + +sequenceDecs_decode_bmi2_fill_end: + // Update offset + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ R8, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, 16(R9) + + // Update match length + MOVQ $0x00000808, CX + BEXTRQ CX, DI, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ DI, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, 8(R9) + + // Fill bitreader to have enough for the remaining + CMPQ BX, $0x08 + JL sequenceDecs_decode_bmi2_fill_2_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R13 + MOVQ (R13), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decode_bmi2_fill_2_end + +sequenceDecs_decode_bmi2_fill_2_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decode_bmi2_fill_2_end + CMPQ DX, $0x07 + JLE sequenceDecs_decode_bmi2_fill_2_end + SHLQ $0x08, AX + SUBQ $0x01, R13 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R13), CX + ORQ CX, AX + JMP sequenceDecs_decode_bmi2_fill_2_byte_by_byte + +sequenceDecs_decode_bmi2_fill_2_end: + // Update literal length + MOVQ $0x00000808, CX + BEXTRQ CX, SI, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ SI, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, (R9) + + // Fill bitreader for state updates + MOVQ R13, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_bmi2_skip_update + + // Update Literal Length State + MOVBQZX SI, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, SI, SI + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, SI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI + + // Update Match Length State + MOVBQZX DI, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, DI, DI + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, DI + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(DI*8), DI + + // Update Offset State + MOVBQZX R8, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, R8, R8 + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, R8 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 + +sequenceDecs_decode_bmi2_skip_update: + // Adjust offset + MOVQ 16(R9), CX + CMPQ R13, $0x01 + JBE sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0 + MOVQ R11, R12 + MOVQ R10, R11 + MOVQ CX, R10 + JMP sequenceDecs_decode_bmi2_adjust_end + +sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0: + CMPQ (R9), $0x00000000 + JNE sequenceDecs_decode_bmi2_adjust_offset_maybezero + INCQ CX + JMP sequenceDecs_decode_bmi2_adjust_offset_nonzero + +sequenceDecs_decode_bmi2_adjust_offset_maybezero: + TESTQ CX, CX + JNZ sequenceDecs_decode_bmi2_adjust_offset_nonzero + MOVQ R10, CX + JMP sequenceDecs_decode_bmi2_adjust_end + +sequenceDecs_decode_bmi2_adjust_offset_nonzero: + CMPQ CX, $0x01 + JB sequenceDecs_decode_bmi2_adjust_zero + JEQ sequenceDecs_decode_bmi2_adjust_one + CMPQ CX, $0x02 + JA sequenceDecs_decode_bmi2_adjust_three + JMP sequenceDecs_decode_bmi2_adjust_two + +sequenceDecs_decode_bmi2_adjust_zero: + MOVQ R10, R13 + JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_bmi2_adjust_one: + MOVQ R11, R13 + JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_bmi2_adjust_two: + MOVQ R12, R13 + JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_bmi2_adjust_three: + LEAQ -1(R10), R13 + +sequenceDecs_decode_bmi2_adjust_test_temp_valid: + TESTQ R13, R13 + JNZ sequenceDecs_decode_bmi2_adjust_temp_valid + MOVQ $0x00000001, R13 + +sequenceDecs_decode_bmi2_adjust_temp_valid: + CMPQ CX, $0x01 + CMOVQNE R11, R12 + MOVQ R10, R11 + MOVQ R13, R10 + MOVQ R13, CX + +sequenceDecs_decode_bmi2_adjust_end: + MOVQ CX, 16(R9) + + // Check values + MOVQ 8(R9), R13 + MOVQ (R9), R14 + LEAQ (R13)(R14*1), R15 + MOVQ s+0(FP), BP + ADDQ R15, 256(BP) + MOVQ ctx+16(FP), R15 + SUBQ R14, 128(R15) + JS error_not_enough_literals + CMPQ R13, $0x00020002 + JA sequenceDecs_decode_bmi2_error_match_len_too_big + TESTQ CX, CX + JNZ sequenceDecs_decode_bmi2_match_len_ofs_ok + TESTQ R13, R13 + JNZ sequenceDecs_decode_bmi2_error_match_len_ofs_mismatch + +sequenceDecs_decode_bmi2_match_len_ofs_ok: + ADDQ $0x18, R9 + MOVQ ctx+16(FP), CX + DECQ 96(CX) + JNS sequenceDecs_decode_bmi2_main_loop + MOVQ s+0(FP), CX + MOVQ R10, 144(CX) + MOVQ R11, 152(CX) + MOVQ R12, 160(CX) + MOVQ br+8(FP), CX + MOVQ AX, 32(CX) + MOVB DL, 40(CX) + MOVQ BX, 24(CX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decode_bmi2_error_match_len_ofs_mismatch: + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decode_bmi2_error_match_len_too_big: + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int +// Requires: BMI, BMI2, CMOV +TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32 + MOVQ br+8(FP), CX + MOVQ 32(CX), AX + MOVBQZX 40(CX), DX + MOVQ 24(CX), BX + MOVQ (CX), CX + ADDQ BX, CX + MOVQ CX, (SP) + MOVQ ctx+16(FP), CX + MOVQ 72(CX), SI + MOVQ 80(CX), DI + MOVQ 88(CX), R8 + MOVQ 104(CX), R9 + MOVQ s+0(FP), CX + MOVQ 144(CX), R10 + MOVQ 152(CX), R11 + MOVQ 160(CX), R12 + +sequenceDecs_decode_56_bmi2_main_loop: + MOVQ (SP), R13 + + // Fill bitreader to have enough for the offset and match length. + CMPQ BX, $0x08 + JL sequenceDecs_decode_56_bmi2_fill_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R13 + MOVQ (R13), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decode_56_bmi2_fill_end + +sequenceDecs_decode_56_bmi2_fill_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decode_56_bmi2_fill_end + CMPQ DX, $0x07 + JLE sequenceDecs_decode_56_bmi2_fill_end + SHLQ $0x08, AX + SUBQ $0x01, R13 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R13), CX + ORQ CX, AX + JMP sequenceDecs_decode_56_bmi2_fill_byte_by_byte + +sequenceDecs_decode_56_bmi2_fill_end: + // Update offset + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ R8, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, 16(R9) + + // Update match length + MOVQ $0x00000808, CX + BEXTRQ CX, DI, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ DI, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, 8(R9) + + // Update literal length + MOVQ $0x00000808, CX + BEXTRQ CX, SI, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ SI, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, (R9) + + // Fill bitreader for state updates + MOVQ R13, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_56_bmi2_skip_update + + // Update Literal Length State + MOVBQZX SI, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, SI, SI + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, SI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI + + // Update Match Length State + MOVBQZX DI, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, DI, DI + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, DI + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(DI*8), DI + + // Update Offset State + MOVBQZX R8, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, R8, R8 + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, R8 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 + +sequenceDecs_decode_56_bmi2_skip_update: + // Adjust offset + MOVQ 16(R9), CX + CMPQ R13, $0x01 + JBE sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0 + MOVQ R11, R12 + MOVQ R10, R11 + MOVQ CX, R10 + JMP sequenceDecs_decode_56_bmi2_adjust_end + +sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0: + CMPQ (R9), $0x00000000 + JNE sequenceDecs_decode_56_bmi2_adjust_offset_maybezero + INCQ CX + JMP sequenceDecs_decode_56_bmi2_adjust_offset_nonzero + +sequenceDecs_decode_56_bmi2_adjust_offset_maybezero: + TESTQ CX, CX + JNZ sequenceDecs_decode_56_bmi2_adjust_offset_nonzero + MOVQ R10, CX + JMP sequenceDecs_decode_56_bmi2_adjust_end + +sequenceDecs_decode_56_bmi2_adjust_offset_nonzero: + CMPQ CX, $0x01 + JB sequenceDecs_decode_56_bmi2_adjust_zero + JEQ sequenceDecs_decode_56_bmi2_adjust_one + CMPQ CX, $0x02 + JA sequenceDecs_decode_56_bmi2_adjust_three + JMP sequenceDecs_decode_56_bmi2_adjust_two + +sequenceDecs_decode_56_bmi2_adjust_zero: + MOVQ R10, R13 + JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_56_bmi2_adjust_one: + MOVQ R11, R13 + JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_56_bmi2_adjust_two: + MOVQ R12, R13 + JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_56_bmi2_adjust_three: + LEAQ -1(R10), R13 + +sequenceDecs_decode_56_bmi2_adjust_test_temp_valid: + TESTQ R13, R13 + JNZ sequenceDecs_decode_56_bmi2_adjust_temp_valid + MOVQ $0x00000001, R13 + +sequenceDecs_decode_56_bmi2_adjust_temp_valid: + CMPQ CX, $0x01 + CMOVQNE R11, R12 + MOVQ R10, R11 + MOVQ R13, R10 + MOVQ R13, CX + +sequenceDecs_decode_56_bmi2_adjust_end: + MOVQ CX, 16(R9) + + // Check values + MOVQ 8(R9), R13 + MOVQ (R9), R14 + LEAQ (R13)(R14*1), R15 + MOVQ s+0(FP), BP + ADDQ R15, 256(BP) + MOVQ ctx+16(FP), R15 + SUBQ R14, 128(R15) + JS error_not_enough_literals + CMPQ R13, $0x00020002 + JA sequenceDecs_decode_56_bmi2_error_match_len_too_big + TESTQ CX, CX + JNZ sequenceDecs_decode_56_bmi2_match_len_ofs_ok + TESTQ R13, R13 + JNZ sequenceDecs_decode_56_bmi2_error_match_len_ofs_mismatch + +sequenceDecs_decode_56_bmi2_match_len_ofs_ok: + ADDQ $0x18, R9 + MOVQ ctx+16(FP), CX + DECQ 96(CX) + JNS sequenceDecs_decode_56_bmi2_main_loop + MOVQ s+0(FP), CX + MOVQ R10, 144(CX) + MOVQ R11, 152(CX) + MOVQ R12, 160(CX) + MOVQ br+8(FP), CX + MOVQ AX, 32(CX) + MOVB DL, 40(CX) + MOVQ BX, 24(CX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decode_56_bmi2_error_match_len_ofs_mismatch: + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decode_56_bmi2_error_match_len_too_big: + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool +// Requires: SSE +TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9 + MOVQ ctx+0(FP), R10 + MOVQ 8(R10), CX + TESTQ CX, CX + JZ empty_seqs + MOVQ (R10), AX + MOVQ 24(R10), DX + MOVQ 32(R10), BX + MOVQ 80(R10), SI + MOVQ 104(R10), DI + MOVQ 120(R10), R8 + MOVQ 56(R10), R9 + MOVQ 64(R10), R10 + ADDQ R10, R9 + + // seqsBase += 24 * seqIndex + LEAQ (DX)(DX*2), R11 + SHLQ $0x03, R11 + ADDQ R11, AX + + // outBase += outPosition + ADDQ DI, BX + +main_loop: + MOVQ (AX), R11 + MOVQ 16(AX), R12 + MOVQ 8(AX), R13 + + // Copy literals + TESTQ R11, R11 + JZ check_offset + XORQ R14, R14 + TESTQ $0x00000001, R11 + JZ copy_1_word + MOVB (SI)(R14*1), R15 + MOVB R15, (BX)(R14*1) + ADDQ $0x01, R14 + +copy_1_word: + TESTQ $0x00000002, R11 + JZ copy_1_dword + MOVW (SI)(R14*1), R15 + MOVW R15, (BX)(R14*1) + ADDQ $0x02, R14 + +copy_1_dword: + TESTQ $0x00000004, R11 + JZ copy_1_qword + MOVL (SI)(R14*1), R15 + MOVL R15, (BX)(R14*1) + ADDQ $0x04, R14 + +copy_1_qword: + TESTQ $0x00000008, R11 + JZ copy_1_test + MOVQ (SI)(R14*1), R15 + MOVQ R15, (BX)(R14*1) + ADDQ $0x08, R14 + JMP copy_1_test + +copy_1: + MOVUPS (SI)(R14*1), X0 + MOVUPS X0, (BX)(R14*1) + ADDQ $0x10, R14 + +copy_1_test: + CMPQ R14, R11 + JB copy_1 + ADDQ R11, SI + ADDQ R11, BX + ADDQ R11, DI + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + LEAQ (DI)(R10*1), R11 + CMPQ R12, R11 + JG error_match_off_too_big + CMPQ R12, R8 + JG error_match_off_too_big + + // Copy match from history + MOVQ R12, R11 + SUBQ DI, R11 + JLS copy_match + MOVQ R9, R14 + SUBQ R11, R14 + CMPQ R13, R11 + JGE copy_all_from_history + XORQ R11, R11 + TESTQ $0x00000001, R13 + JZ copy_4_word + MOVB (R14)(R11*1), R12 + MOVB R12, (BX)(R11*1) + ADDQ $0x01, R11 + +copy_4_word: + TESTQ $0x00000002, R13 + JZ copy_4_dword + MOVW (R14)(R11*1), R12 + MOVW R12, (BX)(R11*1) + ADDQ $0x02, R11 + +copy_4_dword: + TESTQ $0x00000004, R13 + JZ copy_4_qword + MOVL (R14)(R11*1), R12 + MOVL R12, (BX)(R11*1) + ADDQ $0x04, R11 + +copy_4_qword: + TESTQ $0x00000008, R13 + JZ copy_4_test + MOVQ (R14)(R11*1), R12 + MOVQ R12, (BX)(R11*1) + ADDQ $0x08, R11 + JMP copy_4_test + +copy_4: + MOVUPS (R14)(R11*1), X0 + MOVUPS X0, (BX)(R11*1) + ADDQ $0x10, R11 + +copy_4_test: + CMPQ R11, R13 + JB copy_4 + ADDQ R13, DI + ADDQ R13, BX + ADDQ $0x18, AX + INCQ DX + CMPQ DX, CX + JB main_loop + JMP loop_finished + +copy_all_from_history: + XORQ R15, R15 + TESTQ $0x00000001, R11 + JZ copy_5_word + MOVB (R14)(R15*1), BP + MOVB BP, (BX)(R15*1) + ADDQ $0x01, R15 + +copy_5_word: + TESTQ $0x00000002, R11 + JZ copy_5_dword + MOVW (R14)(R15*1), BP + MOVW BP, (BX)(R15*1) + ADDQ $0x02, R15 + +copy_5_dword: + TESTQ $0x00000004, R11 + JZ copy_5_qword + MOVL (R14)(R15*1), BP + MOVL BP, (BX)(R15*1) + ADDQ $0x04, R15 + +copy_5_qword: + TESTQ $0x00000008, R11 + JZ copy_5_test + MOVQ (R14)(R15*1), BP + MOVQ BP, (BX)(R15*1) + ADDQ $0x08, R15 + JMP copy_5_test + +copy_5: + MOVUPS (R14)(R15*1), X0 + MOVUPS X0, (BX)(R15*1) + ADDQ $0x10, R15 + +copy_5_test: + CMPQ R15, R11 + JB copy_5 + ADDQ R11, BX + ADDQ R11, DI + SUBQ R11, R13 + + // Copy match from the current buffer +copy_match: + TESTQ R13, R13 + JZ handle_loop + MOVQ BX, R11 + SUBQ R12, R11 + + // ml <= mo + CMPQ R13, R12 + JA copy_overlapping_match + + // Copy non-overlapping match + ADDQ R13, DI + MOVQ BX, R12 + ADDQ R13, BX + +copy_2: + MOVUPS (R11), X0 + MOVUPS X0, (R12) + ADDQ $0x10, R11 + ADDQ $0x10, R12 + SUBQ $0x10, R13 + JHI copy_2 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + ADDQ R13, DI + +copy_slow_3: + MOVB (R11), R12 + MOVB R12, (BX) + INCQ R11 + INCQ BX + DECQ R13 + JNZ copy_slow_3 + +handle_loop: + ADDQ $0x18, AX + INCQ DX + CMPQ DX, CX + JB main_loop + +loop_finished: + // Return value + MOVB $0x01, ret+8(FP) + + // Update the context + MOVQ ctx+0(FP), AX + MOVQ DX, 24(AX) + MOVQ DI, 104(AX) + MOVQ 80(AX), CX + SUBQ CX, SI + MOVQ SI, 112(AX) + RET + +error_match_off_too_big: + // Return value + MOVB $0x00, ret+8(FP) + + // Update the context + MOVQ ctx+0(FP), AX + MOVQ DX, 24(AX) + MOVQ DI, 104(AX) + MOVQ 80(AX), CX + SUBQ CX, SI + MOVQ SI, 112(AX) + RET + +empty_seqs: + // Return value + MOVB $0x01, ret+8(FP) + RET + +// func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int +// Requires: CMOV, SSE +TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 + MOVQ br+8(FP), AX + MOVQ 32(AX), DX + MOVBQZX 40(AX), BX + MOVQ 24(AX), SI + MOVQ (AX), AX + ADDQ SI, AX + MOVQ AX, (SP) + MOVQ ctx+16(FP), AX + MOVQ 72(AX), DI + MOVQ 80(AX), R8 + MOVQ 88(AX), R9 + MOVQ 112(AX), R10 + MOVQ 128(AX), CX + MOVQ CX, 32(SP) + MOVQ 144(AX), R11 + MOVQ 136(AX), R12 + MOVQ 200(AX), CX + MOVQ CX, 56(SP) + MOVQ 176(AX), CX + MOVQ CX, 48(SP) + MOVQ 184(AX), AX + MOVQ AX, 40(SP) + MOVQ 40(SP), AX + ADDQ AX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R10, 32(SP) + + // outBase += outPosition + ADDQ R12, R10 + +sequenceDecs_decodeSync_amd64_main_loop: + MOVQ (SP), R13 + + // Fill bitreader to have enough for the offset and match length. + CMPQ SI, $0x08 + JL sequenceDecs_decodeSync_amd64_fill_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R13 + MOVQ (R13), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decodeSync_amd64_fill_end + +sequenceDecs_decodeSync_amd64_fill_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decodeSync_amd64_fill_end + CMPQ BX, $0x07 + JLE sequenceDecs_decodeSync_amd64_fill_end + SHLQ $0x08, DX + SUBQ $0x01, R13 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R13), AX + ORQ AX, DX + JMP sequenceDecs_decodeSync_amd64_fill_byte_by_byte + +sequenceDecs_decodeSync_amd64_fill_end: + // Update offset + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 8(SP) + + // Update match length + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 16(SP) + + // Fill bitreader to have enough for the remaining + CMPQ SI, $0x08 + JL sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R13 + MOVQ (R13), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decodeSync_amd64_fill_2_end + +sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decodeSync_amd64_fill_2_end + CMPQ BX, $0x07 + JLE sequenceDecs_decodeSync_amd64_fill_2_end + SHLQ $0x08, DX + SUBQ $0x01, R13 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R13), AX + ORQ AX, DX + JMP sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte + +sequenceDecs_decodeSync_amd64_fill_2_end: + // Update literal length + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 24(SP) + + // Fill bitreader for state updates + MOVQ R13, (SP) + MOVQ R9, AX + SHRQ $0x08, AX + MOVBQZX AL, AX + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_amd64_skip_update + + // Update Literal Length State + MOVBQZX DI, R13 + SHRQ $0x10, DI + MOVWQZX DI, DI + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_amd64_llState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, DI + +sequenceDecs_decodeSync_amd64_llState_updateState_skip_zero: + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(DI*8), DI + + // Update Match Length State + MOVBQZX R8, R13 + SHRQ $0x10, R8 + MOVWQZX R8, R8 + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_amd64_mlState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, R8 + +sequenceDecs_decodeSync_amd64_mlState_updateState_skip_zero: + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Offset State + MOVBQZX R9, R13 + SHRQ $0x10, R9 + MOVWQZX R9, R9 + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_amd64_ofState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, R9 + +sequenceDecs_decodeSync_amd64_ofState_updateState_skip_zero: + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R9*8), R9 + +sequenceDecs_decodeSync_amd64_skip_update: + // Adjust offset + MOVQ s+0(FP), CX + MOVQ 8(SP), R13 + CMPQ AX, $0x01 + JBE sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0 + MOVUPS 144(CX), X0 + MOVQ R13, 144(CX) + MOVUPS X0, 152(CX) + JMP sequenceDecs_decodeSync_amd64_adjust_end + +sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0: + CMPQ 24(SP), $0x00000000 + JNE sequenceDecs_decodeSync_amd64_adjust_offset_maybezero + INCQ R13 + JMP sequenceDecs_decodeSync_amd64_adjust_offset_nonzero + +sequenceDecs_decodeSync_amd64_adjust_offset_maybezero: + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_amd64_adjust_offset_nonzero + MOVQ 144(CX), R13 + JMP sequenceDecs_decodeSync_amd64_adjust_end + +sequenceDecs_decodeSync_amd64_adjust_offset_nonzero: + MOVQ R13, AX + XORQ R14, R14 + MOVQ $-1, R15 + CMPQ R13, $0x03 + CMOVQEQ R14, AX + CMOVQEQ R15, R14 + LEAQ 144(CX), R15 + ADDQ (R15)(AX*8), R14 + JNZ sequenceDecs_decodeSync_amd64_adjust_temp_valid + MOVQ $0x00000001, R14 + +sequenceDecs_decodeSync_amd64_adjust_temp_valid: + CMPQ R13, $0x01 + JZ sequenceDecs_decodeSync_amd64_adjust_skip + MOVQ 152(CX), AX + MOVQ AX, 160(CX) + +sequenceDecs_decodeSync_amd64_adjust_skip: + MOVQ 144(CX), AX + MOVQ AX, 152(CX) + MOVQ R14, 144(CX) + MOVQ R14, R13 + +sequenceDecs_decodeSync_amd64_adjust_end: + MOVQ R13, 8(SP) + + // Check values + MOVQ 16(SP), AX + MOVQ 24(SP), CX + LEAQ (AX)(CX*1), R14 + MOVQ s+0(FP), R15 + ADDQ R14, 256(R15) + MOVQ ctx+16(FP), R14 + SUBQ CX, 104(R14) + JS error_not_enough_literals + CMPQ AX, $0x00020002 + JA sequenceDecs_decodeSync_amd64_error_match_len_too_big + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_amd64_match_len_ofs_ok + TESTQ AX, AX + JNZ sequenceDecs_decodeSync_amd64_error_match_len_ofs_mismatch + +sequenceDecs_decodeSync_amd64_match_len_ofs_ok: + MOVQ 24(SP), AX + MOVQ 8(SP), CX + MOVQ 16(SP), R13 + + // Check if we have enough space in s.out + LEAQ (AX)(R13*1), R14 + ADDQ R10, R14 + CMPQ R14, 32(SP) + JA error_not_enough_space + + // Copy literals + TESTQ AX, AX + JZ check_offset + XORQ R14, R14 + TESTQ $0x00000001, AX + JZ copy_1_word + MOVB (R11)(R14*1), R15 + MOVB R15, (R10)(R14*1) + ADDQ $0x01, R14 + +copy_1_word: + TESTQ $0x00000002, AX + JZ copy_1_dword + MOVW (R11)(R14*1), R15 + MOVW R15, (R10)(R14*1) + ADDQ $0x02, R14 + +copy_1_dword: + TESTQ $0x00000004, AX + JZ copy_1_qword + MOVL (R11)(R14*1), R15 + MOVL R15, (R10)(R14*1) + ADDQ $0x04, R14 + +copy_1_qword: + TESTQ $0x00000008, AX + JZ copy_1_test + MOVQ (R11)(R14*1), R15 + MOVQ R15, (R10)(R14*1) + ADDQ $0x08, R14 + JMP copy_1_test + +copy_1: + MOVUPS (R11)(R14*1), X0 + MOVUPS X0, (R10)(R14*1) + ADDQ $0x10, R14 + +copy_1_test: + CMPQ R14, AX + JB copy_1 + ADDQ AX, R11 + ADDQ AX, R10 + ADDQ AX, R12 + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + MOVQ R12, AX + ADDQ 40(SP), AX + CMPQ CX, AX + JG error_match_off_too_big + CMPQ CX, 56(SP) + JG error_match_off_too_big + + // Copy match from history + MOVQ CX, AX + SUBQ R12, AX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ AX, R14 + CMPQ R13, AX + JGE copy_all_from_history + XORQ AX, AX + TESTQ $0x00000001, R13 + JZ copy_4_word + MOVB (R14)(AX*1), CL + MOVB CL, (R10)(AX*1) + ADDQ $0x01, AX + +copy_4_word: + TESTQ $0x00000002, R13 + JZ copy_4_dword + MOVW (R14)(AX*1), CX + MOVW CX, (R10)(AX*1) + ADDQ $0x02, AX + +copy_4_dword: + TESTQ $0x00000004, R13 + JZ copy_4_qword + MOVL (R14)(AX*1), CX + MOVL CX, (R10)(AX*1) + ADDQ $0x04, AX + +copy_4_qword: + TESTQ $0x00000008, R13 + JZ copy_4_test + MOVQ (R14)(AX*1), CX + MOVQ CX, (R10)(AX*1) + ADDQ $0x08, AX + JMP copy_4_test + +copy_4: + MOVUPS (R14)(AX*1), X0 + MOVUPS X0, (R10)(AX*1) + ADDQ $0x10, AX + +copy_4_test: + CMPQ AX, R13 + JB copy_4 + ADDQ R13, R12 + ADDQ R13, R10 + JMP handle_loop + JMP loop_finished + +copy_all_from_history: + XORQ R15, R15 + TESTQ $0x00000001, AX + JZ copy_5_word + MOVB (R14)(R15*1), BP + MOVB BP, (R10)(R15*1) + ADDQ $0x01, R15 + +copy_5_word: + TESTQ $0x00000002, AX + JZ copy_5_dword + MOVW (R14)(R15*1), BP + MOVW BP, (R10)(R15*1) + ADDQ $0x02, R15 + +copy_5_dword: + TESTQ $0x00000004, AX + JZ copy_5_qword + MOVL (R14)(R15*1), BP + MOVL BP, (R10)(R15*1) + ADDQ $0x04, R15 + +copy_5_qword: + TESTQ $0x00000008, AX + JZ copy_5_test + MOVQ (R14)(R15*1), BP + MOVQ BP, (R10)(R15*1) + ADDQ $0x08, R15 + JMP copy_5_test + +copy_5: + MOVUPS (R14)(R15*1), X0 + MOVUPS X0, (R10)(R15*1) + ADDQ $0x10, R15 + +copy_5_test: + CMPQ R15, AX + JB copy_5 + ADDQ AX, R10 + ADDQ AX, R12 + SUBQ AX, R13 + + // Copy match from the current buffer +copy_match: + TESTQ R13, R13 + JZ handle_loop + MOVQ R10, AX + SUBQ CX, AX + + // ml <= mo + CMPQ R13, CX + JA copy_overlapping_match + + // Copy non-overlapping match + ADDQ R13, R12 + MOVQ R10, CX + ADDQ R13, R10 + +copy_2: + MOVUPS (AX), X0 + MOVUPS X0, (CX) + ADDQ $0x10, AX + ADDQ $0x10, CX + SUBQ $0x10, R13 + JHI copy_2 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + ADDQ R13, R12 + +copy_slow_3: + MOVB (AX), CL + MOVB CL, (R10) + INCQ AX + INCQ R10 + DECQ R13 + JNZ copy_slow_3 + +handle_loop: + MOVQ ctx+16(FP), AX + DECQ 96(AX) + JNS sequenceDecs_decodeSync_amd64_main_loop + +loop_finished: + MOVQ br+8(FP), AX + MOVQ DX, 32(AX) + MOVB BL, 40(AX) + MOVQ SI, 24(AX) + + // Update the context + MOVQ ctx+16(FP), AX + MOVQ R12, 136(AX) + MOVQ 144(AX), CX + SUBQ CX, R11 + MOVQ R11, 168(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decodeSync_amd64_error_match_len_ofs_mismatch: + MOVQ 16(SP), AX + MOVQ ctx+16(FP), CX + MOVQ AX, 216(CX) + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decodeSync_amd64_error_match_len_too_big: + MOVQ ctx+16(FP), AX + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error +error_match_off_too_big: + MOVQ ctx+16(FP), AX + MOVQ 8(SP), CX + MOVQ CX, 224(AX) + MOVQ R12, 136(AX) + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ R12, 136(AX) + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int +// Requires: BMI, BMI2, CMOV, SSE +TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 + MOVQ br+8(FP), CX + MOVQ 32(CX), AX + MOVBQZX 40(CX), DX + MOVQ 24(CX), BX + MOVQ (CX), CX + ADDQ BX, CX + MOVQ CX, (SP) + MOVQ ctx+16(FP), CX + MOVQ 72(CX), SI + MOVQ 80(CX), DI + MOVQ 88(CX), R8 + MOVQ 112(CX), R9 + MOVQ 128(CX), R10 + MOVQ R10, 32(SP) + MOVQ 144(CX), R10 + MOVQ 136(CX), R11 + MOVQ 200(CX), R12 + MOVQ R12, 56(SP) + MOVQ 176(CX), R12 + MOVQ R12, 48(SP) + MOVQ 184(CX), CX + MOVQ CX, 40(SP) + MOVQ 40(SP), CX + ADDQ CX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R9, 32(SP) + + // outBase += outPosition + ADDQ R11, R9 + +sequenceDecs_decodeSync_bmi2_main_loop: + MOVQ (SP), R12 + + // Fill bitreader to have enough for the offset and match length. + CMPQ BX, $0x08 + JL sequenceDecs_decodeSync_bmi2_fill_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R12 + MOVQ (R12), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decodeSync_bmi2_fill_end + +sequenceDecs_decodeSync_bmi2_fill_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decodeSync_bmi2_fill_end + CMPQ DX, $0x07 + JLE sequenceDecs_decodeSync_bmi2_fill_end + SHLQ $0x08, AX + SUBQ $0x01, R12 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R12), CX + ORQ CX, AX + JMP sequenceDecs_decodeSync_bmi2_fill_byte_by_byte + +sequenceDecs_decodeSync_bmi2_fill_end: + // Update offset + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ R8, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 8(SP) + + // Update match length + MOVQ $0x00000808, CX + BEXTRQ CX, DI, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ DI, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 16(SP) + + // Fill bitreader to have enough for the remaining + CMPQ BX, $0x08 + JL sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R12 + MOVQ (R12), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decodeSync_bmi2_fill_2_end + +sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decodeSync_bmi2_fill_2_end + CMPQ DX, $0x07 + JLE sequenceDecs_decodeSync_bmi2_fill_2_end + SHLQ $0x08, AX + SUBQ $0x01, R12 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R12), CX + ORQ CX, AX + JMP sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte + +sequenceDecs_decodeSync_bmi2_fill_2_end: + // Update literal length + MOVQ $0x00000808, CX + BEXTRQ CX, SI, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ SI, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 24(SP) + + // Fill bitreader for state updates + MOVQ R12, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R12 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_bmi2_skip_update + + // Update Literal Length State + MOVBQZX SI, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, SI, SI + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, SI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI + + // Update Match Length State + MOVBQZX DI, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, DI, DI + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, DI + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(DI*8), DI + + // Update Offset State + MOVBQZX R8, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, R8, R8 + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, R8 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 + +sequenceDecs_decodeSync_bmi2_skip_update: + // Adjust offset + MOVQ s+0(FP), CX + MOVQ 8(SP), R13 + CMPQ R12, $0x01 + JBE sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0 + MOVUPS 144(CX), X0 + MOVQ R13, 144(CX) + MOVUPS X0, 152(CX) + JMP sequenceDecs_decodeSync_bmi2_adjust_end + +sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0: + CMPQ 24(SP), $0x00000000 + JNE sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero + INCQ R13 + JMP sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero + +sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero: + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero + MOVQ 144(CX), R13 + JMP sequenceDecs_decodeSync_bmi2_adjust_end + +sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero: + MOVQ R13, R12 + XORQ R14, R14 + MOVQ $-1, R15 + CMPQ R13, $0x03 + CMOVQEQ R14, R12 + CMOVQEQ R15, R14 + LEAQ 144(CX), R15 + ADDQ (R15)(R12*8), R14 + JNZ sequenceDecs_decodeSync_bmi2_adjust_temp_valid + MOVQ $0x00000001, R14 + +sequenceDecs_decodeSync_bmi2_adjust_temp_valid: + CMPQ R13, $0x01 + JZ sequenceDecs_decodeSync_bmi2_adjust_skip + MOVQ 152(CX), R12 + MOVQ R12, 160(CX) + +sequenceDecs_decodeSync_bmi2_adjust_skip: + MOVQ 144(CX), R12 + MOVQ R12, 152(CX) + MOVQ R14, 144(CX) + MOVQ R14, R13 + +sequenceDecs_decodeSync_bmi2_adjust_end: + MOVQ R13, 8(SP) + + // Check values + MOVQ 16(SP), CX + MOVQ 24(SP), R12 + LEAQ (CX)(R12*1), R14 + MOVQ s+0(FP), R15 + ADDQ R14, 256(R15) + MOVQ ctx+16(FP), R14 + SUBQ R12, 104(R14) + JS error_not_enough_literals + CMPQ CX, $0x00020002 + JA sequenceDecs_decodeSync_bmi2_error_match_len_too_big + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_bmi2_match_len_ofs_ok + TESTQ CX, CX + JNZ sequenceDecs_decodeSync_bmi2_error_match_len_ofs_mismatch + +sequenceDecs_decodeSync_bmi2_match_len_ofs_ok: + MOVQ 24(SP), CX + MOVQ 8(SP), R12 + MOVQ 16(SP), R13 + + // Check if we have enough space in s.out + LEAQ (CX)(R13*1), R14 + ADDQ R9, R14 + CMPQ R14, 32(SP) + JA error_not_enough_space + + // Copy literals + TESTQ CX, CX + JZ check_offset + XORQ R14, R14 + TESTQ $0x00000001, CX + JZ copy_1_word + MOVB (R10)(R14*1), R15 + MOVB R15, (R9)(R14*1) + ADDQ $0x01, R14 + +copy_1_word: + TESTQ $0x00000002, CX + JZ copy_1_dword + MOVW (R10)(R14*1), R15 + MOVW R15, (R9)(R14*1) + ADDQ $0x02, R14 + +copy_1_dword: + TESTQ $0x00000004, CX + JZ copy_1_qword + MOVL (R10)(R14*1), R15 + MOVL R15, (R9)(R14*1) + ADDQ $0x04, R14 + +copy_1_qword: + TESTQ $0x00000008, CX + JZ copy_1_test + MOVQ (R10)(R14*1), R15 + MOVQ R15, (R9)(R14*1) + ADDQ $0x08, R14 + JMP copy_1_test + +copy_1: + MOVUPS (R10)(R14*1), X0 + MOVUPS X0, (R9)(R14*1) + ADDQ $0x10, R14 + +copy_1_test: + CMPQ R14, CX + JB copy_1 + ADDQ CX, R10 + ADDQ CX, R9 + ADDQ CX, R11 + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + MOVQ R11, CX + ADDQ 40(SP), CX + CMPQ R12, CX + JG error_match_off_too_big + CMPQ R12, 56(SP) + JG error_match_off_too_big + + // Copy match from history + MOVQ R12, CX + SUBQ R11, CX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ CX, R14 + CMPQ R13, CX + JGE copy_all_from_history + XORQ CX, CX + TESTQ $0x00000001, R13 + JZ copy_4_word + MOVB (R14)(CX*1), R12 + MOVB R12, (R9)(CX*1) + ADDQ $0x01, CX + +copy_4_word: + TESTQ $0x00000002, R13 + JZ copy_4_dword + MOVW (R14)(CX*1), R12 + MOVW R12, (R9)(CX*1) + ADDQ $0x02, CX + +copy_4_dword: + TESTQ $0x00000004, R13 + JZ copy_4_qword + MOVL (R14)(CX*1), R12 + MOVL R12, (R9)(CX*1) + ADDQ $0x04, CX + +copy_4_qword: + TESTQ $0x00000008, R13 + JZ copy_4_test + MOVQ (R14)(CX*1), R12 + MOVQ R12, (R9)(CX*1) + ADDQ $0x08, CX + JMP copy_4_test + +copy_4: + MOVUPS (R14)(CX*1), X0 + MOVUPS X0, (R9)(CX*1) + ADDQ $0x10, CX + +copy_4_test: + CMPQ CX, R13 + JB copy_4 + ADDQ R13, R11 + ADDQ R13, R9 + JMP handle_loop + JMP loop_finished + +copy_all_from_history: + XORQ R15, R15 + TESTQ $0x00000001, CX + JZ copy_5_word + MOVB (R14)(R15*1), BP + MOVB BP, (R9)(R15*1) + ADDQ $0x01, R15 + +copy_5_word: + TESTQ $0x00000002, CX + JZ copy_5_dword + MOVW (R14)(R15*1), BP + MOVW BP, (R9)(R15*1) + ADDQ $0x02, R15 + +copy_5_dword: + TESTQ $0x00000004, CX + JZ copy_5_qword + MOVL (R14)(R15*1), BP + MOVL BP, (R9)(R15*1) + ADDQ $0x04, R15 + +copy_5_qword: + TESTQ $0x00000008, CX + JZ copy_5_test + MOVQ (R14)(R15*1), BP + MOVQ BP, (R9)(R15*1) + ADDQ $0x08, R15 + JMP copy_5_test + +copy_5: + MOVUPS (R14)(R15*1), X0 + MOVUPS X0, (R9)(R15*1) + ADDQ $0x10, R15 + +copy_5_test: + CMPQ R15, CX + JB copy_5 + ADDQ CX, R9 + ADDQ CX, R11 + SUBQ CX, R13 + + // Copy match from the current buffer +copy_match: + TESTQ R13, R13 + JZ handle_loop + MOVQ R9, CX + SUBQ R12, CX + + // ml <= mo + CMPQ R13, R12 + JA copy_overlapping_match + + // Copy non-overlapping match + ADDQ R13, R11 + MOVQ R9, R12 + ADDQ R13, R9 + +copy_2: + MOVUPS (CX), X0 + MOVUPS X0, (R12) + ADDQ $0x10, CX + ADDQ $0x10, R12 + SUBQ $0x10, R13 + JHI copy_2 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + ADDQ R13, R11 + +copy_slow_3: + MOVB (CX), R12 + MOVB R12, (R9) + INCQ CX + INCQ R9 + DECQ R13 + JNZ copy_slow_3 + +handle_loop: + MOVQ ctx+16(FP), CX + DECQ 96(CX) + JNS sequenceDecs_decodeSync_bmi2_main_loop + +loop_finished: + MOVQ br+8(FP), CX + MOVQ AX, 32(CX) + MOVB DL, 40(CX) + MOVQ BX, 24(CX) + + // Update the context + MOVQ ctx+16(FP), AX + MOVQ R11, 136(AX) + MOVQ 144(AX), CX + SUBQ CX, R10 + MOVQ R10, 168(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decodeSync_bmi2_error_match_len_ofs_mismatch: + MOVQ 16(SP), AX + MOVQ ctx+16(FP), CX + MOVQ AX, 216(CX) + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decodeSync_bmi2_error_match_len_too_big: + MOVQ ctx+16(FP), AX + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error +error_match_off_too_big: + MOVQ ctx+16(FP), AX + MOVQ 8(SP), CX + MOVQ CX, 224(AX) + MOVQ R11, 136(AX) + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ R11, 136(AX) + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int +// Requires: CMOV, SSE +TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 + MOVQ br+8(FP), AX + MOVQ 32(AX), DX + MOVBQZX 40(AX), BX + MOVQ 24(AX), SI + MOVQ (AX), AX + ADDQ SI, AX + MOVQ AX, (SP) + MOVQ ctx+16(FP), AX + MOVQ 72(AX), DI + MOVQ 80(AX), R8 + MOVQ 88(AX), R9 + MOVQ 112(AX), R10 + MOVQ 128(AX), CX + MOVQ CX, 32(SP) + MOVQ 144(AX), R11 + MOVQ 136(AX), R12 + MOVQ 200(AX), CX + MOVQ CX, 56(SP) + MOVQ 176(AX), CX + MOVQ CX, 48(SP) + MOVQ 184(AX), AX + MOVQ AX, 40(SP) + MOVQ 40(SP), AX + ADDQ AX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R10, 32(SP) + + // outBase += outPosition + ADDQ R12, R10 + +sequenceDecs_decodeSync_safe_amd64_main_loop: + MOVQ (SP), R13 + + // Fill bitreader to have enough for the offset and match length. + CMPQ SI, $0x08 + JL sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R13 + MOVQ (R13), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decodeSync_safe_amd64_fill_end + +sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decodeSync_safe_amd64_fill_end + CMPQ BX, $0x07 + JLE sequenceDecs_decodeSync_safe_amd64_fill_end + SHLQ $0x08, DX + SUBQ $0x01, R13 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R13), AX + ORQ AX, DX + JMP sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte + +sequenceDecs_decodeSync_safe_amd64_fill_end: + // Update offset + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 8(SP) + + // Update match length + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 16(SP) + + // Fill bitreader to have enough for the remaining + CMPQ SI, $0x08 + JL sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R13 + MOVQ (R13), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decodeSync_safe_amd64_fill_2_end + +sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decodeSync_safe_amd64_fill_2_end + CMPQ BX, $0x07 + JLE sequenceDecs_decodeSync_safe_amd64_fill_2_end + SHLQ $0x08, DX + SUBQ $0x01, R13 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R13), AX + ORQ AX, DX + JMP sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte + +sequenceDecs_decodeSync_safe_amd64_fill_2_end: + // Update literal length + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 24(SP) + + // Fill bitreader for state updates + MOVQ R13, (SP) + MOVQ R9, AX + SHRQ $0x08, AX + MOVBQZX AL, AX + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_safe_amd64_skip_update + + // Update Literal Length State + MOVBQZX DI, R13 + SHRQ $0x10, DI + MOVWQZX DI, DI + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_safe_amd64_llState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, DI + +sequenceDecs_decodeSync_safe_amd64_llState_updateState_skip_zero: + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(DI*8), DI + + // Update Match Length State + MOVBQZX R8, R13 + SHRQ $0x10, R8 + MOVWQZX R8, R8 + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_safe_amd64_mlState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, R8 + +sequenceDecs_decodeSync_safe_amd64_mlState_updateState_skip_zero: + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Offset State + MOVBQZX R9, R13 + SHRQ $0x10, R9 + MOVWQZX R9, R9 + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_safe_amd64_ofState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, R9 + +sequenceDecs_decodeSync_safe_amd64_ofState_updateState_skip_zero: + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R9*8), R9 + +sequenceDecs_decodeSync_safe_amd64_skip_update: + // Adjust offset + MOVQ s+0(FP), CX + MOVQ 8(SP), R13 + CMPQ AX, $0x01 + JBE sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0 + MOVUPS 144(CX), X0 + MOVQ R13, 144(CX) + MOVUPS X0, 152(CX) + JMP sequenceDecs_decodeSync_safe_amd64_adjust_end + +sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0: + CMPQ 24(SP), $0x00000000 + JNE sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero + INCQ R13 + JMP sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero + +sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero: + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero + MOVQ 144(CX), R13 + JMP sequenceDecs_decodeSync_safe_amd64_adjust_end + +sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero: + MOVQ R13, AX + XORQ R14, R14 + MOVQ $-1, R15 + CMPQ R13, $0x03 + CMOVQEQ R14, AX + CMOVQEQ R15, R14 + LEAQ 144(CX), R15 + ADDQ (R15)(AX*8), R14 + JNZ sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid + MOVQ $0x00000001, R14 + +sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid: + CMPQ R13, $0x01 + JZ sequenceDecs_decodeSync_safe_amd64_adjust_skip + MOVQ 152(CX), AX + MOVQ AX, 160(CX) + +sequenceDecs_decodeSync_safe_amd64_adjust_skip: + MOVQ 144(CX), AX + MOVQ AX, 152(CX) + MOVQ R14, 144(CX) + MOVQ R14, R13 + +sequenceDecs_decodeSync_safe_amd64_adjust_end: + MOVQ R13, 8(SP) + + // Check values + MOVQ 16(SP), AX + MOVQ 24(SP), CX + LEAQ (AX)(CX*1), R14 + MOVQ s+0(FP), R15 + ADDQ R14, 256(R15) + MOVQ ctx+16(FP), R14 + SUBQ CX, 104(R14) + JS error_not_enough_literals + CMPQ AX, $0x00020002 + JA sequenceDecs_decodeSync_safe_amd64_error_match_len_too_big + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok + TESTQ AX, AX + JNZ sequenceDecs_decodeSync_safe_amd64_error_match_len_ofs_mismatch + +sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok: + MOVQ 24(SP), AX + MOVQ 8(SP), CX + MOVQ 16(SP), R13 + + // Check if we have enough space in s.out + LEAQ (AX)(R13*1), R14 + ADDQ R10, R14 + CMPQ R14, 32(SP) + JA error_not_enough_space + + // Copy literals + TESTQ AX, AX + JZ check_offset + XORQ R14, R14 + TESTQ $0x00000001, AX + JZ copy_1_word + MOVB (R11)(R14*1), R15 + MOVB R15, (R10)(R14*1) + ADDQ $0x01, R14 + +copy_1_word: + TESTQ $0x00000002, AX + JZ copy_1_dword + MOVW (R11)(R14*1), R15 + MOVW R15, (R10)(R14*1) + ADDQ $0x02, R14 + +copy_1_dword: + TESTQ $0x00000004, AX + JZ copy_1_qword + MOVL (R11)(R14*1), R15 + MOVL R15, (R10)(R14*1) + ADDQ $0x04, R14 + +copy_1_qword: + TESTQ $0x00000008, AX + JZ copy_1_test + MOVQ (R11)(R14*1), R15 + MOVQ R15, (R10)(R14*1) + ADDQ $0x08, R14 + JMP copy_1_test + +copy_1: + MOVUPS (R11)(R14*1), X0 + MOVUPS X0, (R10)(R14*1) + ADDQ $0x10, R14 + +copy_1_test: + CMPQ R14, AX + JB copy_1 + ADDQ AX, R11 + ADDQ AX, R10 + ADDQ AX, R12 + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + MOVQ R12, AX + ADDQ 40(SP), AX + CMPQ CX, AX + JG error_match_off_too_big + CMPQ CX, 56(SP) + JG error_match_off_too_big + + // Copy match from history + MOVQ CX, AX + SUBQ R12, AX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ AX, R14 + CMPQ R13, AX + JGE copy_all_from_history + XORQ AX, AX + TESTQ $0x00000001, R13 + JZ copy_4_word + MOVB (R14)(AX*1), CL + MOVB CL, (R10)(AX*1) + ADDQ $0x01, AX + +copy_4_word: + TESTQ $0x00000002, R13 + JZ copy_4_dword + MOVW (R14)(AX*1), CX + MOVW CX, (R10)(AX*1) + ADDQ $0x02, AX + +copy_4_dword: + TESTQ $0x00000004, R13 + JZ copy_4_qword + MOVL (R14)(AX*1), CX + MOVL CX, (R10)(AX*1) + ADDQ $0x04, AX + +copy_4_qword: + TESTQ $0x00000008, R13 + JZ copy_4_test + MOVQ (R14)(AX*1), CX + MOVQ CX, (R10)(AX*1) + ADDQ $0x08, AX + JMP copy_4_test + +copy_4: + MOVUPS (R14)(AX*1), X0 + MOVUPS X0, (R10)(AX*1) + ADDQ $0x10, AX + +copy_4_test: + CMPQ AX, R13 + JB copy_4 + ADDQ R13, R12 + ADDQ R13, R10 + JMP handle_loop + JMP loop_finished + +copy_all_from_history: + XORQ R15, R15 + TESTQ $0x00000001, AX + JZ copy_5_word + MOVB (R14)(R15*1), BP + MOVB BP, (R10)(R15*1) + ADDQ $0x01, R15 + +copy_5_word: + TESTQ $0x00000002, AX + JZ copy_5_dword + MOVW (R14)(R15*1), BP + MOVW BP, (R10)(R15*1) + ADDQ $0x02, R15 + +copy_5_dword: + TESTQ $0x00000004, AX + JZ copy_5_qword + MOVL (R14)(R15*1), BP + MOVL BP, (R10)(R15*1) + ADDQ $0x04, R15 + +copy_5_qword: + TESTQ $0x00000008, AX + JZ copy_5_test + MOVQ (R14)(R15*1), BP + MOVQ BP, (R10)(R15*1) + ADDQ $0x08, R15 + JMP copy_5_test + +copy_5: + MOVUPS (R14)(R15*1), X0 + MOVUPS X0, (R10)(R15*1) + ADDQ $0x10, R15 + +copy_5_test: + CMPQ R15, AX + JB copy_5 + ADDQ AX, R10 + ADDQ AX, R12 + SUBQ AX, R13 + + // Copy match from the current buffer +copy_match: + TESTQ R13, R13 + JZ handle_loop + MOVQ R10, AX + SUBQ CX, AX + + // ml <= mo + CMPQ R13, CX + JA copy_overlapping_match + + // Copy non-overlapping match + ADDQ R13, R12 + XORQ CX, CX + TESTQ $0x00000001, R13 + JZ copy_2_word + MOVB (AX)(CX*1), R14 + MOVB R14, (R10)(CX*1) + ADDQ $0x01, CX + +copy_2_word: + TESTQ $0x00000002, R13 + JZ copy_2_dword + MOVW (AX)(CX*1), R14 + MOVW R14, (R10)(CX*1) + ADDQ $0x02, CX + +copy_2_dword: + TESTQ $0x00000004, R13 + JZ copy_2_qword + MOVL (AX)(CX*1), R14 + MOVL R14, (R10)(CX*1) + ADDQ $0x04, CX + +copy_2_qword: + TESTQ $0x00000008, R13 + JZ copy_2_test + MOVQ (AX)(CX*1), R14 + MOVQ R14, (R10)(CX*1) + ADDQ $0x08, CX + JMP copy_2_test + +copy_2: + MOVUPS (AX)(CX*1), X0 + MOVUPS X0, (R10)(CX*1) + ADDQ $0x10, CX + +copy_2_test: + CMPQ CX, R13 + JB copy_2 + ADDQ R13, R10 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + ADDQ R13, R12 + +copy_slow_3: + MOVB (AX), CL + MOVB CL, (R10) + INCQ AX + INCQ R10 + DECQ R13 + JNZ copy_slow_3 + +handle_loop: + MOVQ ctx+16(FP), AX + DECQ 96(AX) + JNS sequenceDecs_decodeSync_safe_amd64_main_loop + +loop_finished: + MOVQ br+8(FP), AX + MOVQ DX, 32(AX) + MOVB BL, 40(AX) + MOVQ SI, 24(AX) + + // Update the context + MOVQ ctx+16(FP), AX + MOVQ R12, 136(AX) + MOVQ 144(AX), CX + SUBQ CX, R11 + MOVQ R11, 168(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decodeSync_safe_amd64_error_match_len_ofs_mismatch: + MOVQ 16(SP), AX + MOVQ ctx+16(FP), CX + MOVQ AX, 216(CX) + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decodeSync_safe_amd64_error_match_len_too_big: + MOVQ ctx+16(FP), AX + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error +error_match_off_too_big: + MOVQ ctx+16(FP), AX + MOVQ 8(SP), CX + MOVQ CX, 224(AX) + MOVQ R12, 136(AX) + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ R12, 136(AX) + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int +// Requires: BMI, BMI2, CMOV, SSE +TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 + MOVQ br+8(FP), CX + MOVQ 32(CX), AX + MOVBQZX 40(CX), DX + MOVQ 24(CX), BX + MOVQ (CX), CX + ADDQ BX, CX + MOVQ CX, (SP) + MOVQ ctx+16(FP), CX + MOVQ 72(CX), SI + MOVQ 80(CX), DI + MOVQ 88(CX), R8 + MOVQ 112(CX), R9 + MOVQ 128(CX), R10 + MOVQ R10, 32(SP) + MOVQ 144(CX), R10 + MOVQ 136(CX), R11 + MOVQ 200(CX), R12 + MOVQ R12, 56(SP) + MOVQ 176(CX), R12 + MOVQ R12, 48(SP) + MOVQ 184(CX), CX + MOVQ CX, 40(SP) + MOVQ 40(SP), CX + ADDQ CX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R9, 32(SP) + + // outBase += outPosition + ADDQ R11, R9 + +sequenceDecs_decodeSync_safe_bmi2_main_loop: + MOVQ (SP), R12 + + // Fill bitreader to have enough for the offset and match length. + CMPQ BX, $0x08 + JL sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R12 + MOVQ (R12), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decodeSync_safe_bmi2_fill_end + +sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decodeSync_safe_bmi2_fill_end + CMPQ DX, $0x07 + JLE sequenceDecs_decodeSync_safe_bmi2_fill_end + SHLQ $0x08, AX + SUBQ $0x01, R12 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R12), CX + ORQ CX, AX + JMP sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte + +sequenceDecs_decodeSync_safe_bmi2_fill_end: + // Update offset + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ R8, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 8(SP) + + // Update match length + MOVQ $0x00000808, CX + BEXTRQ CX, DI, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ DI, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 16(SP) + + // Fill bitreader to have enough for the remaining + CMPQ BX, $0x08 + JL sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R12 + MOVQ (R12), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decodeSync_safe_bmi2_fill_2_end + +sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_end + CMPQ DX, $0x07 + JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_end + SHLQ $0x08, AX + SUBQ $0x01, R12 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R12), CX + ORQ CX, AX + JMP sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte + +sequenceDecs_decodeSync_safe_bmi2_fill_2_end: + // Update literal length + MOVQ $0x00000808, CX + BEXTRQ CX, SI, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ SI, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 24(SP) + + // Fill bitreader for state updates + MOVQ R12, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R12 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_safe_bmi2_skip_update + + // Update Literal Length State + MOVBQZX SI, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, SI, SI + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, SI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI + + // Update Match Length State + MOVBQZX DI, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, DI, DI + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, DI + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(DI*8), DI + + // Update Offset State + MOVBQZX R8, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, R8, R8 + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, R8 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 + +sequenceDecs_decodeSync_safe_bmi2_skip_update: + // Adjust offset + MOVQ s+0(FP), CX + MOVQ 8(SP), R13 + CMPQ R12, $0x01 + JBE sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0 + MOVUPS 144(CX), X0 + MOVQ R13, 144(CX) + MOVUPS X0, 152(CX) + JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end + +sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0: + CMPQ 24(SP), $0x00000000 + JNE sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero + INCQ R13 + JMP sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero + +sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero: + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero + MOVQ 144(CX), R13 + JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end + +sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero: + MOVQ R13, R12 + XORQ R14, R14 + MOVQ $-1, R15 + CMPQ R13, $0x03 + CMOVQEQ R14, R12 + CMOVQEQ R15, R14 + LEAQ 144(CX), R15 + ADDQ (R15)(R12*8), R14 + JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid + MOVQ $0x00000001, R14 + +sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid: + CMPQ R13, $0x01 + JZ sequenceDecs_decodeSync_safe_bmi2_adjust_skip + MOVQ 152(CX), R12 + MOVQ R12, 160(CX) + +sequenceDecs_decodeSync_safe_bmi2_adjust_skip: + MOVQ 144(CX), R12 + MOVQ R12, 152(CX) + MOVQ R14, 144(CX) + MOVQ R14, R13 + +sequenceDecs_decodeSync_safe_bmi2_adjust_end: + MOVQ R13, 8(SP) + + // Check values + MOVQ 16(SP), CX + MOVQ 24(SP), R12 + LEAQ (CX)(R12*1), R14 + MOVQ s+0(FP), R15 + ADDQ R14, 256(R15) + MOVQ ctx+16(FP), R14 + SUBQ R12, 104(R14) + JS error_not_enough_literals + CMPQ CX, $0x00020002 + JA sequenceDecs_decodeSync_safe_bmi2_error_match_len_too_big + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok + TESTQ CX, CX + JNZ sequenceDecs_decodeSync_safe_bmi2_error_match_len_ofs_mismatch + +sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok: + MOVQ 24(SP), CX + MOVQ 8(SP), R12 + MOVQ 16(SP), R13 + + // Check if we have enough space in s.out + LEAQ (CX)(R13*1), R14 + ADDQ R9, R14 + CMPQ R14, 32(SP) + JA error_not_enough_space + + // Copy literals + TESTQ CX, CX + JZ check_offset + XORQ R14, R14 + TESTQ $0x00000001, CX + JZ copy_1_word + MOVB (R10)(R14*1), R15 + MOVB R15, (R9)(R14*1) + ADDQ $0x01, R14 + +copy_1_word: + TESTQ $0x00000002, CX + JZ copy_1_dword + MOVW (R10)(R14*1), R15 + MOVW R15, (R9)(R14*1) + ADDQ $0x02, R14 + +copy_1_dword: + TESTQ $0x00000004, CX + JZ copy_1_qword + MOVL (R10)(R14*1), R15 + MOVL R15, (R9)(R14*1) + ADDQ $0x04, R14 + +copy_1_qword: + TESTQ $0x00000008, CX + JZ copy_1_test + MOVQ (R10)(R14*1), R15 + MOVQ R15, (R9)(R14*1) + ADDQ $0x08, R14 + JMP copy_1_test + +copy_1: + MOVUPS (R10)(R14*1), X0 + MOVUPS X0, (R9)(R14*1) + ADDQ $0x10, R14 + +copy_1_test: + CMPQ R14, CX + JB copy_1 + ADDQ CX, R10 + ADDQ CX, R9 + ADDQ CX, R11 + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + MOVQ R11, CX + ADDQ 40(SP), CX + CMPQ R12, CX + JG error_match_off_too_big + CMPQ R12, 56(SP) + JG error_match_off_too_big + + // Copy match from history + MOVQ R12, CX + SUBQ R11, CX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ CX, R14 + CMPQ R13, CX + JGE copy_all_from_history + XORQ CX, CX + TESTQ $0x00000001, R13 + JZ copy_4_word + MOVB (R14)(CX*1), R12 + MOVB R12, (R9)(CX*1) + ADDQ $0x01, CX + +copy_4_word: + TESTQ $0x00000002, R13 + JZ copy_4_dword + MOVW (R14)(CX*1), R12 + MOVW R12, (R9)(CX*1) + ADDQ $0x02, CX + +copy_4_dword: + TESTQ $0x00000004, R13 + JZ copy_4_qword + MOVL (R14)(CX*1), R12 + MOVL R12, (R9)(CX*1) + ADDQ $0x04, CX + +copy_4_qword: + TESTQ $0x00000008, R13 + JZ copy_4_test + MOVQ (R14)(CX*1), R12 + MOVQ R12, (R9)(CX*1) + ADDQ $0x08, CX + JMP copy_4_test + +copy_4: + MOVUPS (R14)(CX*1), X0 + MOVUPS X0, (R9)(CX*1) + ADDQ $0x10, CX + +copy_4_test: + CMPQ CX, R13 + JB copy_4 + ADDQ R13, R11 + ADDQ R13, R9 + JMP handle_loop + JMP loop_finished + +copy_all_from_history: + XORQ R15, R15 + TESTQ $0x00000001, CX + JZ copy_5_word + MOVB (R14)(R15*1), BP + MOVB BP, (R9)(R15*1) + ADDQ $0x01, R15 + +copy_5_word: + TESTQ $0x00000002, CX + JZ copy_5_dword + MOVW (R14)(R15*1), BP + MOVW BP, (R9)(R15*1) + ADDQ $0x02, R15 + +copy_5_dword: + TESTQ $0x00000004, CX + JZ copy_5_qword + MOVL (R14)(R15*1), BP + MOVL BP, (R9)(R15*1) + ADDQ $0x04, R15 + +copy_5_qword: + TESTQ $0x00000008, CX + JZ copy_5_test + MOVQ (R14)(R15*1), BP + MOVQ BP, (R9)(R15*1) + ADDQ $0x08, R15 + JMP copy_5_test + +copy_5: + MOVUPS (R14)(R15*1), X0 + MOVUPS X0, (R9)(R15*1) + ADDQ $0x10, R15 + +copy_5_test: + CMPQ R15, CX + JB copy_5 + ADDQ CX, R9 + ADDQ CX, R11 + SUBQ CX, R13 + + // Copy match from the current buffer +copy_match: + TESTQ R13, R13 + JZ handle_loop + MOVQ R9, CX + SUBQ R12, CX + + // ml <= mo + CMPQ R13, R12 + JA copy_overlapping_match + + // Copy non-overlapping match + ADDQ R13, R11 + XORQ R12, R12 + TESTQ $0x00000001, R13 + JZ copy_2_word + MOVB (CX)(R12*1), R14 + MOVB R14, (R9)(R12*1) + ADDQ $0x01, R12 + +copy_2_word: + TESTQ $0x00000002, R13 + JZ copy_2_dword + MOVW (CX)(R12*1), R14 + MOVW R14, (R9)(R12*1) + ADDQ $0x02, R12 + +copy_2_dword: + TESTQ $0x00000004, R13 + JZ copy_2_qword + MOVL (CX)(R12*1), R14 + MOVL R14, (R9)(R12*1) + ADDQ $0x04, R12 + +copy_2_qword: + TESTQ $0x00000008, R13 + JZ copy_2_test + MOVQ (CX)(R12*1), R14 + MOVQ R14, (R9)(R12*1) + ADDQ $0x08, R12 + JMP copy_2_test + +copy_2: + MOVUPS (CX)(R12*1), X0 + MOVUPS X0, (R9)(R12*1) + ADDQ $0x10, R12 + +copy_2_test: + CMPQ R12, R13 + JB copy_2 + ADDQ R13, R9 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + ADDQ R13, R11 + +copy_slow_3: + MOVB (CX), R12 + MOVB R12, (R9) + INCQ CX + INCQ R9 + DECQ R13 + JNZ copy_slow_3 + +handle_loop: + MOVQ ctx+16(FP), CX + DECQ 96(CX) + JNS sequenceDecs_decodeSync_safe_bmi2_main_loop + +loop_finished: + MOVQ br+8(FP), CX + MOVQ AX, 32(CX) + MOVB DL, 40(CX) + MOVQ BX, 24(CX) + + // Update the context + MOVQ ctx+16(FP), AX + MOVQ R11, 136(AX) + MOVQ 144(AX), CX + SUBQ CX, R10 + MOVQ R10, 168(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decodeSync_safe_bmi2_error_match_len_ofs_mismatch: + MOVQ 16(SP), AX + MOVQ ctx+16(FP), CX + MOVQ AX, 216(CX) + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decodeSync_safe_bmi2_error_match_len_too_big: + MOVQ ctx+16(FP), AX + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error +error_match_off_too_big: + MOVQ ctx+16(FP), AX + MOVQ 8(SP), CX + MOVQ CX, 224(AX) + MOVQ R11, 136(AX) + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ R11, 136(AX) + MOVQ $0x00000005, ret+24(FP) + RET diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go new file mode 100644 index 0000000..c3452bc --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go @@ -0,0 +1,237 @@ +//go:build !amd64 || appengine || !gc || noasm +// +build !amd64 appengine !gc noasm + +package zstd + +import ( + "fmt" + "io" +) + +// decode sequences from the stream with the provided history but without dictionary. +func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { + return false, nil +} + +// decode sequences from the stream without the provided history. +func (s *sequenceDecs) decode(seqs []seqVals) error { + br := s.br + + // Grab full sizes tables, to avoid bounds checks. + llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize] + llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state + s.seqSize = 0 + litRemain := len(s.literals) + + maxBlockSize := maxCompressedBlockSize + if s.windowSize < maxBlockSize { + maxBlockSize = s.windowSize + } + for i := range seqs { + var ll, mo, ml int + if br.off > 4+((maxOffsetBits+16+16)>>3) { + // inlined function: + // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) + + // Final will not read from stream. + var llB, mlB, moB uint8 + ll, llB = llState.final() + ml, mlB = mlState.final() + mo, moB = ofState.final() + + // extra bits are stored in reverse order. + br.fillFast() + mo += br.getBits(moB) + if s.maxBits > 32 { + br.fillFast() + } + ml += br.getBits(mlB) + ll += br.getBits(llB) + + if moB > 1 { + s.prevOffset[2] = s.prevOffset[1] + s.prevOffset[1] = s.prevOffset[0] + s.prevOffset[0] = mo + } else { + // mo = s.adjustOffset(mo, ll, moB) + // Inlined for rather big speedup + if ll == 0 { + // There is an exception though, when current sequence's literals_length = 0. + // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, + // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte. + mo++ + } + + if mo == 0 { + mo = s.prevOffset[0] + } else { + var temp int + if mo == 3 { + temp = s.prevOffset[0] - 1 + } else { + temp = s.prevOffset[mo] + } + + if temp == 0 { + // 0 is not valid; input is corrupted; force offset to 1 + println("WARNING: temp was 0") + temp = 1 + } + + if mo != 1 { + s.prevOffset[2] = s.prevOffset[1] + } + s.prevOffset[1] = s.prevOffset[0] + s.prevOffset[0] = temp + mo = temp + } + } + br.fillFast() + } else { + if br.overread() { + if debugDecoder { + printf("reading sequence %d, exceeded available data\n", i) + } + return io.ErrUnexpectedEOF + } + ll, mo, ml = s.next(br, llState, mlState, ofState) + br.fill() + } + + if debugSequences { + println("Seq", i, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml) + } + // Evaluate. + // We might be doing this async, so do it early. + if mo == 0 && ml > 0 { + return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) + } + if ml > maxMatchLen { + return fmt.Errorf("match len (%d) bigger than max allowed length", ml) + } + s.seqSize += ll + ml + if s.seqSize > maxBlockSize { + return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize) + } + litRemain -= ll + if litRemain < 0 { + return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, litRemain+ll) + } + seqs[i] = seqVals{ + ll: ll, + ml: ml, + mo: mo, + } + if i == len(seqs)-1 { + // This is the last sequence, so we shouldn't update state. + break + } + + // Manually inlined, ~ 5-20% faster + // Update all 3 states at once. Approx 20% faster. + nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits() + if nBits == 0 { + llState = llTable[llState.newState()&maxTableMask] + mlState = mlTable[mlState.newState()&maxTableMask] + ofState = ofTable[ofState.newState()&maxTableMask] + } else { + bits := br.get32BitsFast(nBits) + lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31)) + llState = llTable[(llState.newState()+lowBits)&maxTableMask] + + lowBits = uint16(bits >> (ofState.nbBits() & 31)) + lowBits &= bitMask[mlState.nbBits()&15] + mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask] + + lowBits = uint16(bits) & bitMask[ofState.nbBits()&15] + ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask] + } + } + s.seqSize += litRemain + if s.seqSize > maxBlockSize { + return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize) + } + err := br.close() + if err != nil { + printf("Closing sequences: %v, %+v\n", err, *br) + } + return err +} + +// executeSimple handles cases when a dictionary is not used. +func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error { + // Ensure we have enough output size... + if len(s.out)+s.seqSize > cap(s.out) { + addBytes := s.seqSize + len(s.out) + s.out = append(s.out, make([]byte, addBytes)...) + s.out = s.out[:len(s.out)-addBytes] + } + + if debugDecoder { + printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize) + } + + var t = len(s.out) + out := s.out[:t+s.seqSize] + + for _, seq := range seqs { + // Add literals + copy(out[t:], s.literals[:seq.ll]) + t += seq.ll + s.literals = s.literals[seq.ll:] + + // Malformed input + if seq.mo > t+len(hist) || seq.mo > s.windowSize { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist)) + } + + // Copy from history. + if v := seq.mo - t; v > 0 { + // v is the start position in history from end. + start := len(hist) - v + if seq.ml > v { + // Some goes into the current block. + // Copy remainder of history + copy(out[t:], hist[start:]) + t += v + seq.ml -= v + } else { + copy(out[t:], hist[start:start+seq.ml]) + t += seq.ml + continue + } + } + + // We must be in the current buffer now + if seq.ml > 0 { + start := t - seq.mo + if seq.ml <= t-start { + // No overlap + copy(out[t:], out[start:start+seq.ml]) + t += seq.ml + } else { + // Overlapping copy + // Extend destination slice and copy one byte at the time. + src := out[start : start+seq.ml] + dst := out[t:] + dst = dst[:len(src)] + t += len(src) + // Destination is the space we just added. + for i := range src { + dst[i] = src[i] + } + } + } + } + // Add final literals + copy(out[t:], s.literals) + if debugDecoder { + t += len(s.literals) + if t != len(out) { + panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize)) + } + } + s.out = out + + return nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/seqenc.go b/vendor/github.com/klauspost/compress/zstd/seqenc.go index 36bcc3c..8014174 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqenc.go +++ b/vendor/github.com/klauspost/compress/zstd/seqenc.go @@ -35,7 +35,6 @@ func (s *seqCoders) setPrev(ll, ml, of *fseEncoder) { // Ensure we cannot reuse by accident prevEnc := *prev prevEnc.symbolLen = 0 - return } compareSwap(ll, &s.llEnc, &s.llPrev) compareSwap(ml, &s.mlEnc, &s.mlPrev) diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go index 690428c..9e1baad 100644 --- a/vendor/github.com/klauspost/compress/zstd/snappy.go +++ b/vendor/github.com/klauspost/compress/zstd/snappy.go @@ -11,7 +11,7 @@ import ( "io" "github.com/klauspost/compress/huff0" - "github.com/klauspost/compress/snappy" + snappy "github.com/klauspost/compress/internal/snapref" ) const ( @@ -111,7 +111,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { // Add empty last block r.block.reset(nil) r.block.last = true - err := r.block.encodeLits(false) + err := r.block.encodeLits(r.block.literals, false) if err != nil { return written, err } @@ -178,17 +178,16 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { r.err = ErrSnappyCorrupt return written, r.err } - err = r.block.encode(false, false) + err = r.block.encode(nil, false, false) switch err { case errIncompressible: r.block.popOffsets() r.block.reset(nil) r.block.literals, err = snappy.Decode(r.block.literals[:n], r.buf[snappyChecksumSize:chunkLen]) if err != nil { - println("snappy.Decode:", err) return written, err } - err = r.block.encodeLits(false) + err = r.block.encodeLits(r.block.literals, false) if err != nil { return written, err } @@ -204,7 +203,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { written += int64(n) continue case chunkTypeUncompressedData: - if debug { + if debugEncoder { println("Uncompressed, chunklen", chunkLen) } // Section 4.3. Uncompressed data (chunk type 0x01). @@ -235,7 +234,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { r.err = ErrSnappyCorrupt return written, r.err } - err := r.block.encodeLits(false) + err := r.block.encodeLits(r.block.literals, false) if err != nil { return written, err } @@ -247,7 +246,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { continue case chunkTypeStreamIdentifier: - if debug { + if debugEncoder { println("stream id", chunkLen, len(snappyMagicBody)) } // Section 4.1. Stream identifier (chunk type 0xff). @@ -417,7 +416,7 @@ var crcTable = crc32.MakeTable(crc32.Castagnoli) // https://github.com/google/snappy/blob/master/framing_format.txt func snappyCRC(b []byte) uint32 { c := crc32.Update(0, crcTable, b) - return uint32(c>>15|c<<17) + 0xa282ead8 + return c>>15 | c<<17 + 0xa282ead8 } // snappyDecodedLen returns the length of the decoded block and the number of bytes diff --git a/vendor/github.com/klauspost/compress/zstd/zip.go b/vendor/github.com/klauspost/compress/zstd/zip.go new file mode 100644 index 0000000..b53f606 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/zip.go @@ -0,0 +1,134 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package zstd + +import ( + "errors" + "io" + "sync" +) + +// ZipMethodWinZip is the method for Zstandard compressed data inside Zip files for WinZip. +// See https://www.winzip.com/win/en/comp_info.html +const ZipMethodWinZip = 93 + +// ZipMethodPKWare is the original method number used by PKWARE to indicate Zstandard compression. +// Deprecated: This has been deprecated by PKWARE, use ZipMethodWinZip instead for compression. +// See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT +const ZipMethodPKWare = 20 + +var zipReaderPool sync.Pool + +// newZipReader creates a pooled zip decompressor. +func newZipReader(opts ...DOption) func(r io.Reader) io.ReadCloser { + pool := &zipReaderPool + if len(opts) > 0 { + opts = append([]DOption{WithDecoderLowmem(true), WithDecoderMaxWindow(128 << 20)}, opts...) + // Force concurrency 1 + opts = append(opts, WithDecoderConcurrency(1)) + // Create our own pool + pool = &sync.Pool{} + } + return func(r io.Reader) io.ReadCloser { + dec, ok := pool.Get().(*Decoder) + if ok { + dec.Reset(r) + } else { + d, err := NewReader(r, opts...) + if err != nil { + panic(err) + } + dec = d + } + return &pooledZipReader{dec: dec, pool: pool} + } +} + +type pooledZipReader struct { + mu sync.Mutex // guards Close and Read + pool *sync.Pool + dec *Decoder +} + +func (r *pooledZipReader) Read(p []byte) (n int, err error) { + r.mu.Lock() + defer r.mu.Unlock() + if r.dec == nil { + return 0, errors.New("read after close or EOF") + } + dec, err := r.dec.Read(p) + if err == io.EOF { + r.dec.Reset(nil) + r.pool.Put(r.dec) + r.dec = nil + } + return dec, err +} + +func (r *pooledZipReader) Close() error { + r.mu.Lock() + defer r.mu.Unlock() + var err error + if r.dec != nil { + err = r.dec.Reset(nil) + r.pool.Put(r.dec) + r.dec = nil + } + return err +} + +type pooledZipWriter struct { + mu sync.Mutex // guards Close and Read + enc *Encoder + pool *sync.Pool +} + +func (w *pooledZipWriter) Write(p []byte) (n int, err error) { + w.mu.Lock() + defer w.mu.Unlock() + if w.enc == nil { + return 0, errors.New("Write after Close") + } + return w.enc.Write(p) +} + +func (w *pooledZipWriter) Close() error { + w.mu.Lock() + defer w.mu.Unlock() + var err error + if w.enc != nil { + err = w.enc.Close() + w.pool.Put(w.enc) + w.enc = nil + } + return err +} + +// ZipCompressor returns a compressor that can be registered with zip libraries. +// The provided encoder options will be used on all encodes. +func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) { + var pool sync.Pool + return func(w io.Writer) (io.WriteCloser, error) { + enc, ok := pool.Get().(*Encoder) + if ok { + enc.Reset(w) + } else { + var err error + enc, err = NewWriter(w, opts...) + if err != nil { + return nil, err + } + } + return &pooledZipWriter{enc: enc, pool: &pool}, nil + } +} + +// ZipDecompressor returns a decompressor that can be registered with zip libraries. +// See ZipCompressor for example. +// Options can be specified. WithDecoderConcurrency(1) is forced, +// and by default a 128MB maximum decompression window is specified. +// The window size can be overridden if required. +func ZipDecompressor(opts ...DOption) func(r io.Reader) io.ReadCloser { + return newZipReader(opts...) +} diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index 0807719..c1c90b4 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -4,6 +4,8 @@ package zstd import ( + "bytes" + "encoding/binary" "errors" "log" "math" @@ -13,6 +15,12 @@ import ( // enable debug printing const debug = false +// enable encoding debug printing +const debugEncoder = debug + +// enable decoding debug printing +const debugDecoder = debug + // Enable extra assertions. const debugAsserts = debug || false @@ -31,6 +39,9 @@ const zstdMinMatch = 3 // Reset the buffer offset when reaching this. const bufferReset = math.MaxInt32 - MaxWindowSize +// fcsUnknown is used for unknown frame content size. +const fcsUnknown = math.MaxUint64 + var ( // ErrReservedBlockType is returned when a reserved block type is found. // Typically this indicates wrong or corrupted input. @@ -44,6 +55,10 @@ var ( // Typically returned on invalid input. ErrBlockTooSmall = errors.New("block too small") + // ErrUnexpectedBlockSize is returned when a block has unexpected size. + // Typically returned on invalid input. + ErrUnexpectedBlockSize = errors.New("unexpected block size") + // ErrMagicMismatch is returned when a "magic" number isn't what is expected. // Typically this indicates wrong or corrupted input. ErrMagicMismatch = errors.New("invalid input: magic number mismatch") @@ -67,22 +82,30 @@ var ( // This is only returned if SingleSegment is specified on the frame. ErrFrameSizeExceeded = errors.New("frame size exceeded") + // ErrFrameSizeMismatch is returned if the stated frame size does not match the expected size. + // This is only returned if SingleSegment is specified on the frame. + ErrFrameSizeMismatch = errors.New("frame size does not match size on stream") + // ErrCRCMismatch is returned if CRC mismatches. ErrCRCMismatch = errors.New("CRC check failed") // ErrDecoderClosed will be returned if the Decoder was used after // Close has been called. ErrDecoderClosed = errors.New("decoder used after Close") + + // ErrDecoderNilInput is returned when a nil Reader was provided + // and an operation other than Reset/DecodeAll/Close was attempted. + ErrDecoderNilInput = errors.New("nil input provided as reader") ) func println(a ...interface{}) { - if debug { + if debug || debugDecoder || debugEncoder { log.Println(a...) } } func printf(format string, a ...interface{}) { - if debug { + if debug || debugDecoder || debugEncoder { log.Printf(format, a...) } } @@ -121,24 +144,20 @@ func matchLen(a, b []byte) int { } func load3232(b []byte, i int32) uint32 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:4] - return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 + return binary.LittleEndian.Uint32(b[i:]) } func load6432(b []byte, i int32) uint64 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:8] - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 + return binary.LittleEndian.Uint64(b[i:]) } func load64(b []byte, i int) uint64 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:8] - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 + return binary.LittleEndian.Uint64(b[i:]) } + +type byter interface { + Bytes() []byte + Len() int +} + +var _ byter = &bytes.Buffer{} diff --git a/vendor/github.com/klauspost/pgzip/.travis.yml b/vendor/github.com/klauspost/pgzip/.travis.yml index 6e9fca0..acfec4b 100644 --- a/vendor/github.com/klauspost/pgzip/.travis.yml +++ b/vendor/github.com/klauspost/pgzip/.travis.yml @@ -1,19 +1,22 @@ language: go -sudo: false - os: - linux - osx go: - - 1.9.x - - 1.10.x + - 1.13.x + - 1.14.x + - 1.15.x - master -script: - - go test -v -cpu=1,2,4 . - - go test -v -cpu=2 -race -short . +env: + - GO111MODULE=off + +script: + - diff <(gofmt -d .) <(printf "") + - go test -v -cpu=1,2,4 . + - go test -v -cpu=2 -race -short . matrix: allow_failures: diff --git a/vendor/github.com/klauspost/pgzip/gunzip.go b/vendor/github.com/klauspost/pgzip/gunzip.go index 93efec7..d1ae730 100644 --- a/vendor/github.com/klauspost/pgzip/gunzip.go +++ b/vendor/github.com/klauspost/pgzip/gunzip.go @@ -331,6 +331,16 @@ func (z *Reader) killReadAhead() error { // Wait for decompressor to be closed and return error, if any. e, ok := <-z.closeErr z.activeRA = false + + for blk := range z.readAhead { + if blk.b != nil { + z.blockPool <- blk.b + } + } + if cap(z.current) > 0 { + z.blockPool <- z.current + z.current = nil + } if !ok { // Channel is closed, so if there was any error it has already been returned. return nil @@ -418,6 +428,7 @@ func (z *Reader) doReadAhead() { case z.readAhead <- read{b: buf, err: err}: case <-closeReader: // Sent on close, we don't care about the next results + z.blockPool <- buf return } if err != nil { diff --git a/vendor/github.com/magiconair/properties/.travis.yml b/vendor/github.com/magiconair/properties/.travis.yml index 6cee379..baf9031 100644 --- a/vendor/github.com/magiconair/properties/.travis.yml +++ b/vendor/github.com/magiconair/properties/.travis.yml @@ -12,4 +12,6 @@ go: - "1.12.x" - "1.13.x" - "1.14.x" + - "1.15.x" + - "1.16.x" - tip diff --git a/vendor/github.com/magiconair/properties/CHANGELOG.md b/vendor/github.com/magiconair/properties/CHANGELOG.md index 176626a..ff8d025 100644 --- a/vendor/github.com/magiconair/properties/CHANGELOG.md +++ b/vendor/github.com/magiconair/properties/CHANGELOG.md @@ -1,8 +1,29 @@ ## Changelog +### [1.8.2](https://github.com/magiconair/properties/tree/v1.8.2) - 25 Aug 2020 + + * [PR #36](https://github.com/magiconair/properties/pull/36): Escape backslash on write + + This patch ensures that backslashes are escaped on write. Existing applications which + rely on the old behavior may need to be updated. + + Thanks to [@apesternikov](https://github.com/apesternikov) for the patch. + + * [PR #42](https://github.com/magiconair/properties/pull/42): Made Content-Type check whitespace agnostic in LoadURL() + + Thanks to [@aliras1](https://github.com/aliras1) for the patch. + + * [PR #41](https://github.com/magiconair/properties/pull/41): Make key/value separator configurable on Write() + + Thanks to [@mkjor](https://github.com/mkjor) for the patch. + + * [PR #40](https://github.com/magiconair/properties/pull/40): Add method to return a sorted list of keys + + Thanks to [@mkjor](https://github.com/mkjor) for the patch. + ### [1.8.1](https://github.com/magiconair/properties/tree/v1.8.1) - 10 May 2019 - * [PR #26](https://github.com/magiconair/properties/pull/35): Close body always after request + * [PR #35](https://github.com/magiconair/properties/pull/35): Close body always after request This patch ensures that in `LoadURL` the response body is always closed. diff --git a/vendor/github.com/magiconair/properties/README.md b/vendor/github.com/magiconair/properties/README.md index 42ed5c3..e2edda0 100644 --- a/vendor/github.com/magiconair/properties/README.md +++ b/vendor/github.com/magiconair/properties/README.md @@ -1,6 +1,5 @@ [![](https://img.shields.io/github/tag/magiconair/properties.svg?style=flat-square&label=release)](https://github.com/magiconair/properties/releases) [![Travis CI Status](https://img.shields.io/travis/magiconair/properties.svg?branch=master&style=flat-square&label=travis)](https://travis-ci.org/magiconair/properties) -[![CircleCI Status](https://img.shields.io/circleci/project/github/magiconair/properties.svg?label=circle+ci&style=flat-square)](https://circleci.com/gh/magiconair/properties) [![License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg?style=flat-square)](https://raw.githubusercontent.com/magiconair/properties/master/LICENSE) [![GoDoc](http://img.shields.io/badge/godoc-reference-5272B4.svg?style=flat-square)](http://godoc.org/github.com/magiconair/properties) diff --git a/vendor/github.com/magiconair/properties/lex.go b/vendor/github.com/magiconair/properties/lex.go index 367166d..e1e9dd7 100644 --- a/vendor/github.com/magiconair/properties/lex.go +++ b/vendor/github.com/magiconair/properties/lex.go @@ -128,18 +128,6 @@ func (l *lexer) acceptRun(valid string) { l.backup() } -// acceptRunUntil consumes a run of runes up to a terminator. -func (l *lexer) acceptRunUntil(term rune) { - for term != l.next() { - } - l.backup() -} - -// hasText returns true if the current parsed text is not empty. -func (l *lexer) isNotEmpty() bool { - return l.pos > l.start -} - // lineNumber reports which line we're on, based on the position of // the previous item returned by nextItem. Doing it this way // means we don't have to worry about peek double counting. diff --git a/vendor/github.com/magiconair/properties/load.go b/vendor/github.com/magiconair/properties/load.go index ab95325..c83c2da 100644 --- a/vendor/github.com/magiconair/properties/load.go +++ b/vendor/github.com/magiconair/properties/load.go @@ -132,11 +132,12 @@ func (l *Loader) LoadURL(url string) (*Properties, error) { } ct := resp.Header.Get("Content-Type") + ct = strings.Join(strings.Fields(ct), "") var enc Encoding switch strings.ToLower(ct) { - case "text/plain", "text/plain; charset=iso-8859-1", "text/plain; charset=latin1": + case "text/plain", "text/plain;charset=iso-8859-1", "text/plain;charset=latin1": enc = ISO_8859_1 - case "", "text/plain; charset=utf-8": + case "", "text/plain;charset=utf-8": enc = UTF8 default: return nil, fmt.Errorf("properties: invalid content type %s", ct) diff --git a/vendor/github.com/magiconair/properties/parser.go b/vendor/github.com/magiconair/properties/parser.go index cdc4a80..430e4fc 100644 --- a/vendor/github.com/magiconair/properties/parser.go +++ b/vendor/github.com/magiconair/properties/parser.go @@ -59,14 +59,6 @@ func (p *parser) errorf(format string, args ...interface{}) { panic(fmt.Errorf(format, args...)) } -func (p *parser) expect(expected itemType) (token item) { - token = p.lex.nextItem() - if token.typ != expected { - p.unexpected(token) - } - return token -} - func (p *parser) expectOneOf(expected ...itemType) (token item) { token = p.lex.nextItem() for _, v := range expected { @@ -91,5 +83,4 @@ func (p *parser) recover(errp *error) { } *errp = e.(error) } - return } diff --git a/vendor/github.com/magiconair/properties/properties.go b/vendor/github.com/magiconair/properties/properties.go index 2e4c8a2..62ae2d6 100644 --- a/vendor/github.com/magiconair/properties/properties.go +++ b/vendor/github.com/magiconair/properties/properties.go @@ -8,6 +8,7 @@ package properties // BUG(frank): Write() does not allow to configure the newline character. Therefore, on Windows LF is used. import ( + "bytes" "fmt" "io" "log" @@ -115,7 +116,7 @@ func (p *Properties) Get(key string) (value string, ok bool) { // circular references and malformed expressions // so we panic if we still get an error here. if err != nil { - ErrorHandler(fmt.Errorf("%s in %q", err, key+" = "+v)) + ErrorHandler(err) } return expanded, true @@ -636,7 +637,7 @@ func (p *Properties) WriteComment(w io.Writer, prefix string, enc Encoding) (n i } for _, c := range comments { - x, err = fmt.Fprintf(w, "%s%s\n", prefix, encode(c, "", enc)) + x, err = fmt.Fprintf(w, "%s%s\n", prefix, c) if err != nil { return } @@ -766,7 +767,12 @@ func expand(s string, keys []string, prefix, postfix string, values map[string]s for _, k := range keys { if key == k { - return "", fmt.Errorf("circular reference") + var b bytes.Buffer + b.WriteString("circular reference in:\n") + for _, k1 := range keys { + fmt.Fprintf(&b, "%s=%s\n", k1, values[k1]) + } + return "", fmt.Errorf(b.String()) } } @@ -780,7 +786,6 @@ func expand(s string, keys []string, prefix, postfix string, values map[string]s } s = s[:start] + new_val + s[end+1:] } - return s, nil } // encode encodes a UTF-8 string to ISO-8859-1 and escapes some characters. @@ -833,6 +838,8 @@ func escape(r rune, special string) string { return "\\r" case '\t': return "\\t" + case '\\': + return "\\\\" default: if strings.ContainsRune(special, r) { return "\\" + string(r) diff --git a/vendor/github.com/nwaples/rardecode/archive.go b/vendor/github.com/nwaples/rardecode/archive.go index f878751..34e4ac2 100644 --- a/vendor/github.com/nwaples/rardecode/archive.go +++ b/vendor/github.com/nwaples/rardecode/archive.go @@ -146,67 +146,54 @@ type volume struct { old bool // uses old naming scheme } -// nextVolName updates name to the next filename in the archive. -func (v *volume) nextVolName() { - if v.num == 0 { - // check file extensions - i := strings.LastIndex(v.file, ".") - if i < 0 { - // no file extension, add one - i = len(v.file) - v.file += ".rar" - } else { - ext := strings.ToLower(v.file[i+1:]) - // replace with .rar for empty extensions & self extracting archives - if ext == "" || ext == "exe" || ext == "sfx" { - v.file = v.file[:i+1] + "rar" - } - } - if a, ok := v.fileBlockReader.(*archive15); ok { - v.old = a.old - } - // new naming scheme must have volume number in filename - if !v.old && reDigits.FindStringIndex(v.file) == nil { - v.old = true - } - // For old style naming if 2nd and 3rd character of file extension is not a digit replace - // with "00" and ignore any trailing characters. - if v.old && (len(v.file) < i+4 || v.file[i+2] < '0' || v.file[i+2] > '9' || v.file[i+3] < '0' || v.file[i+3] > '9') { - v.file = v.file[:i+2] + "00" - return - } +func (v *volume) openFile(file string) error { + f, err := os.Open(v.dir + file) + if err != nil { + return err } - // new style volume naming - if !v.old { - // find all numbers in volume name - m := reDigits.FindAllStringIndex(v.file, -1) - if l := len(m); l > 1 { - // More than 1 match so assume name.part###of###.rar style. - // Take the last 2 matches where the first is the volume number. - m = m[l-2 : l] - if strings.Contains(v.file[m[0][1]:m[1][0]], ".") || !strings.Contains(v.file[:m[0][0]], ".") { - // Didn't match above style as volume had '.' between the two numbers or didnt have a '.' - // before the first match. Use the second number as volume number. - m = m[1:] - } - } - // extract and increment volume number - lo, hi := m[0][0], m[0][1] - n, err := strconv.Atoi(v.file[lo:hi]) - if err != nil { - n = 0 - } else { - n++ + v.f = f + v.file = file + return nil +} + +func nextNewVolName(file string) string { + // find all numbers in volume name + m := reDigits.FindAllStringIndex(file, -1) + if l := len(m); l > 1 { + // More than 1 match so assume name.part###of###.rar style. + // Take the last 2 matches where the first is the volume number. + m = m[l-2 : l] + if strings.Contains(file[m[0][1]:m[1][0]], ".") || !strings.Contains(file[:m[0][0]], ".") { + // Didn't match above style as volume had '.' between the two numbers or didnt have a '.' + // before the first match. Use the second number as volume number. + m = m[1:] } - // volume number must use at least the same number of characters as previous volume - vol := fmt.Sprintf("%0"+fmt.Sprint(hi-lo)+"d", n) - v.file = v.file[:lo] + vol + v.file[hi:] - return } + // extract and increment volume number + lo, hi := m[0][0], m[0][1] + n, err := strconv.Atoi(file[lo:hi]) + if err != nil { + n = 0 + } else { + n++ + } + // volume number must use at least the same number of characters as previous volume + vol := fmt.Sprintf("%0"+fmt.Sprint(hi-lo)+"d", n) + file = file[:lo] + vol + file[hi:] + return file +} + +func nextOldVolName(file string) string { // old style volume naming - i := strings.LastIndex(v.file, ".") + i := strings.LastIndex(file, ".") + // For old style naming if 2nd and 3rd character of file extension is not a digit replace + // with "00" and ignore any trailing characters. + if len(file) < i+4 || file[i+2] < '0' || file[i+2] > '9' || file[i+3] < '0' || file[i+3] > '9' { + file = file[:i+2] + "00" + return file + } // get file extension - b := []byte(v.file[i+1:]) + b := []byte(file[i+1:]) // start incrementing volume number digits from rightmost for j := 2; j >= 0; j-- { if b[j] != '9' { @@ -222,7 +209,54 @@ func (v *volume) nextVolName() { b[j] = '0' } } - v.file = v.file[:i+1] + string(b) + file = file[:i+1] + string(b) + return file +} + +// openNextFile opens the next volume file in the archive. +func (v *volume) openNextFile() error { + file := v.file + if v.num == 0 { + // check file extensions + i := strings.LastIndex(file, ".") + if i < 0 { + // no file extension, add one + file += ".rar" + } else { + ext := strings.ToLower(file[i+1:]) + // replace with .rar for empty extensions & self extracting archives + if ext == "" || ext == "exe" || ext == "sfx" { + file = file[:i+1] + "rar" + } + } + if a, ok := v.fileBlockReader.(*archive15); ok { + v.old = a.old + } + // new naming scheme must have volume number in filename + if !v.old { + if reDigits.FindStringIndex(file) != nil { + // found digits, try using new naming scheme + err := v.openFile(nextNewVolName(file)) + if err != nil && os.IsNotExist(err) { + // file didn't exist, try old naming scheme + oldErr := v.openFile(nextOldVolName(file)) + if oldErr == nil || !os.IsNotExist(err) { + v.old = true + return oldErr + } + } + return err + } + v.old = true + } + } + // new style volume naming + if !v.old { + file = nextNewVolName(file) + } else { + file = nextOldVolName(file) + } + return v.openFile(file) } func (v *volume) next() (*fileBlockHeader, error) { @@ -241,8 +275,7 @@ func (v *volume) next() (*fileBlockHeader, error) { } v.f.Close() - v.nextVolName() - v.f, err = os.Open(v.dir + v.file) // Open next volume file + err = v.openNextFile() // Open next volume file if err != nil { if atEOF && os.IsNotExist(err) { // volume not found so assume that the archive has ended diff --git a/vendor/github.com/nwaples/rardecode/decode_reader.go b/vendor/github.com/nwaples/rardecode/decode_reader.go index b346936..36699f9 100644 --- a/vendor/github.com/nwaples/rardecode/decode_reader.go +++ b/vendor/github.com/nwaples/rardecode/decode_reader.go @@ -162,9 +162,6 @@ func (d *decodeReader) queueFilter(f *filterBlock) error { if len(d.filters) >= maxQueuedFilters { return errTooManyFilters } - // offset & length must be < window size - f.offset &= d.win.mask - f.length &= d.win.mask // make offset relative to previous filter in list for _, fb := range d.filters { if f.offset < fb.offset { @@ -173,6 +170,9 @@ func (d *decodeReader) queueFilter(f *filterBlock) error { } f.offset -= fb.offset } + // offset & length must be < window size + f.offset &= d.win.mask + f.length &= d.win.mask d.filters = append(d.filters, f) return nil } diff --git a/vendor/github.com/nwaples/rardecode/ppm_model.go b/vendor/github.com/nwaples/rardecode/ppm_model.go index 58a545a..fd55a74 100644 --- a/vendor/github.com/nwaples/rardecode/ppm_model.go +++ b/vendor/github.com/nwaples/rardecode/ppm_model.go @@ -193,112 +193,26 @@ func (s *state) setUint16(n uint16) { s.sym = byte(n); s.freq = byte(n >> 8) } // If there is only one state, the second state contains that state. // If there is more than one state, the second state contains the summFreq // and the index to the slice of states. -type context struct { - i int32 // index into the states array for context - s []state // slice of two states representing context - a *subAllocator -} - -// succPtr returns a pointer value for the context to be stored in a state.succ -func (c *context) succPtr() int32 { return c.i } - -func (c *context) numStates() int { return int(c.s[0].uint16()) } - -func (c *context) setNumStates(n int) { c.s[0].setUint16(uint16(n)) } - -func (c *context) statesIndex() int32 { return c.s[1].succ } - -func (c *context) setStatesIndex(n int32) { c.s[1].succ = n } - -func (c *context) suffix() *context { return c.a.succContext(c.s[0].succ) } - -func (c *context) setSuffix(sc *context) { c.s[0].succ = sc.i } - -func (c *context) summFreq() uint16 { return c.s[1].uint16() } - -func (c *context) setSummFreq(f uint16) { c.s[1].setUint16(f) } - -func (c *context) notEq(ctx *context) bool { return c.i != ctx.i } - -func (c *context) states() []state { - if ns := int32(c.s[0].uint16()); ns != 1 { - i := c.s[1].succ - return c.a.states[i : i+ns] - } - return c.s[1:] -} - -// shrinkStates shrinks the state list down to size states -func (c *context) shrinkStates(states []state, size int) []state { - i1 := units2Index[(len(states)+1)>>1] - i2 := units2Index[(size+1)>>1] +// The context is represented by the index into the states array for these two states. +type context int32 - if size == 1 { - // store state in context, and free states block - n := c.statesIndex() - c.s[1] = states[0] - states = c.s[1:] - c.a.addFreeBlock(n, i1) - } else if i1 != i2 { - if n := c.a.removeFreeBlock(i2); n > 0 { - // allocate new block and copy - copy(c.a.states[n:], states[:size]) - states = c.a.states[n:] - // free old block - c.a.addFreeBlock(c.statesIndex(), i1) - c.setStatesIndex(n) - } else { - // split current block, and free units not needed - n = c.statesIndex() + index2Units[i2]<<1 - u := index2Units[i1] - index2Units[i2] - c.a.freeUnits(n, u) - } - } - c.setNumStates(size) - return states[:size] -} - -// expandStates expands the states list by one -func (c *context) expandStates() []state { - states := c.states() - ns := len(states) - if ns == 1 { - s := states[0] - n := c.a.allocUnits(1) - if n == 0 { - return nil - } - c.setStatesIndex(n) - states = c.a.states[n:] - states[0] = s - } else if ns&0x1 == 0 { - u := ns >> 1 - i1 := units2Index[u] - i2 := units2Index[u+1] - if i1 != i2 { - n := c.a.allocUnits(i2) - if n == 0 { - return nil - } - copy(c.a.states[n:], states) - c.a.addFreeBlock(c.statesIndex(), i1) - c.setStatesIndex(n) - states = c.a.states[n:] - } +// succContext returns a context given a state.succ index +func succContext(i int32) context { + if i <= 0 { + return 0 } - c.setNumStates(ns + 1) - return states[:ns+1] + return context(i) } type subAllocator struct { // memory for allocation is split into two heaps + glueCount int heap1MaxBytes int32 // maximum bytes available in heap1 heap1Lo int32 // heap1 bottom in number of bytes heap1Hi int32 // heap1 top in number of bytes heap2Lo int32 // heap2 bottom index in states heap2Hi int32 // heap2 top index in states - glueCount int // Each freeList entry contains an index into states for the beginning // of a free block. The first state in that block may contain an index @@ -340,9 +254,6 @@ func (a *subAllocator) restart() { for i := range a.freeList { a.freeList[i] = 0 } - for i := range a.states { - a.states[i] = state{} - } } // pushByte puts a byte on the heap and returns a state.succ index that @@ -389,17 +300,6 @@ func (a *subAllocator) succByte(i int32) byte { } } -// succContext returns a context given a state.succ index -func (a *subAllocator) succContext(i int32) *context { - if i <= 0 { - return nil - } - return &context{i: i, s: a.states[i : i+2 : i+2], a: a} -} - -// succIsNil returns whether a state.succ points to nothing -func (a *subAllocator) succIsNil(i int32) bool { return i == 0 } - // nextByteAddr takes a state.succ value representing a pointer // to a byte, and returns the next bytes address func (a *subAllocator) nextByteAddr(n int32) int32 { return n - 1 } @@ -521,7 +421,7 @@ func (a *subAllocator) allocUnits(i byte) int32 { return a.allocUnitsRare(i) } -func (a *subAllocator) newContext(s state, suffix *context) *context { +func (a *subAllocator) newContext(s state, suffix context) context { var n int32 if a.heap2Lo < a.heap2Hi { // allocate from top of heap2 @@ -529,28 +429,121 @@ func (a *subAllocator) newContext(s state, suffix *context) *context { n = a.heap2Hi } else if n = a.removeFreeBlock(1); n == 0 { if n = a.allocUnitsRare(1); n == 0 { - return nil + return 0 } } - c := &context{i: n, s: a.states[n : n+2 : n+2], a: a} - c.s[0] = state{} - c.setNumStates(1) - c.s[1] = s - if suffix != nil { - c.setSuffix(suffix) - } - return c + // we don't need to set numStates to 1 as the default value of 0 in the sym + // field is always incremented by 1 to get numStates. + a.states[n] = state{succ: int32(suffix)} + a.states[n+1] = s + return context(n) } -func (a *subAllocator) newContextSize(ns int) *context { - c := a.newContext(state{}, nil) - c.setNumStates(ns) +func (a *subAllocator) newContextSize(ns int) context { + c := a.newContext(state{}, context(0)) + a.contextSetNumStates(c, ns) i := units2Index[(ns+1)>>1] n := a.allocUnits(i) - c.setStatesIndex(n) + a.contextSetStatesIndex(c, n) return c } +// since number of states is always > 0 && <= 256, we can fit it in a single byte +func (a *subAllocator) contextNumStates(c context) int { return int(a.states[c].sym) + 1 } +func (a *subAllocator) contextSetNumStates(c context, n int) { a.states[c].sym = byte(n - 1) } + +func (a *subAllocator) contextSummFreq(c context) uint16 { return a.states[c+1].uint16() } +func (a *subAllocator) contextSetSummFreq(c context, n uint16) { a.states[c+1].setUint16(n) } +func (a *subAllocator) contextIncSummFreq(c context, n uint16) { + a.states[c+1].setUint16(a.states[c+1].uint16() + n) +} + +func (a *subAllocator) contextSuffix(c context) context { return succContext(a.states[c].succ) } + +func (a *subAllocator) contextStatesIndex(c context) int32 { return a.states[c+1].succ } +func (a *subAllocator) contextSetStatesIndex(c context, n int32) { a.states[c+1].succ = n } + +func (a *subAllocator) contextStates(c context) []state { + if ns := int32(a.states[c].sym) + 1; ns != 1 { + i := a.states[c+1].succ + return a.states[i : i+ns] + } + return a.states[c+1 : c+2] +} + +// shrinkStates shrinks the state list down to size states +func (a *subAllocator) shrinkStates(c context, states []state, size int) []state { + i1 := units2Index[(len(states)+1)>>1] + i2 := units2Index[(size+1)>>1] + + if size == 1 { + // store state in context, and free states block + n := a.contextStatesIndex(c) + a.states[c+1] = states[0] + states = a.states[c+1:] + a.addFreeBlock(n, i1) + } else if i1 != i2 { + if n := a.removeFreeBlock(i2); n > 0 { + // allocate new block and copy + copy(a.states[n:], states[:size]) + states = a.states[n:] + // free old block + a.addFreeBlock(a.contextStatesIndex(c), i1) + a.contextSetStatesIndex(c, n) + } else { + // split current block, and free units not needed + n = a.contextStatesIndex(c) + index2Units[i2]<<1 + u := index2Units[i1] - index2Units[i2] + a.freeUnits(n, u) + } + } + a.contextSetNumStates(c, size) + return states[:size] +} + +// expandStates expands the states list by one +func (a *subAllocator) expandStates(c context) []state { + states := a.contextStates(c) + ns := len(states) + if ns == 1 { + s := states[0] + n := a.allocUnits(1) + if n == 0 { + return nil + } + a.contextSetStatesIndex(c, n) + states = a.states[n:] + states[0] = s + } else if ns&0x1 == 0 { + u := ns >> 1 + i1 := units2Index[u] + i2 := units2Index[u+1] + if i1 != i2 { + n := a.allocUnits(i2) + if n == 0 { + return nil + } + copy(a.states[n:], states) + a.addFreeBlock(a.contextStatesIndex(c), i1) + a.contextSetStatesIndex(c, n) + states = a.states[n:] + } + } + a.contextSetNumStates(c, ns+1) + return states[:ns+1] +} + +func (a *subAllocator) findState(c context, sym byte) *state { + var i int + states := a.contextStates(c) + for i = range states { + if states[i].sym == sym { + break + } + } + return &states[i] +} + type model struct { maxOrder int orderFall int @@ -560,13 +553,14 @@ type model struct { escCount byte prevSym byte initEsc byte - minC *context - maxC *context + c context rc rangeCoder a subAllocator charMask [256]byte binSumm [128][64]uint16 see2Cont [25][16]see2Context + ibuf [256]int + sbuf [256]*state } func (m *model) restart() { @@ -586,15 +580,12 @@ func (m *model) restart() { m.a.restart() - c := m.a.newContextSize(256) - c.setSummFreq(257) - states := c.states() + m.c = m.a.newContextSize(256) + m.a.contextSetSummFreq(m.c, 257) + states := m.a.contextStates(m.c) for i := range states { states[i] = state{sym: byte(i), freq: 1} } - m.minC = c - m.maxC = c - m.prevSym = 0 for i := range m.binSumm { for j, esc := range initBinEsc { @@ -619,9 +610,6 @@ func (m *model) init(br io.ByteReader, reset bool, maxOrder, maxMB int) error { return err } if !reset { - if m.minC == nil { - return errCorruptPPM - } return nil } @@ -631,21 +619,21 @@ func (m *model) init(br io.ByteReader, reset bool, maxOrder, maxMB int) error { return errCorruptPPM } m.maxOrder = maxOrder - m.restart() + m.prevSym = 0 + m.c = 0 return nil } -func (m *model) rescale(s *state) *state { +func (m *model) rescale(c context, s *state) *state { if s.freq <= maxFreq { return s } - c := m.minC var summFreq uint16 s.freq += 4 - states := c.states() - escFreq := c.summFreq() + 4 + states := m.a.contextStates(c) + escFreq := m.a.contextSummFreq(c) + 4 for i := range states { f := states[i].freq @@ -675,7 +663,7 @@ func (m *model) rescale(s *state) *state { escFreq++ } if i != len(states)-1 { - states = c.shrinkStates(states, i+1) + states = m.a.shrinkStates(c, states, i+1) } s = &states[0] if i == 0 { @@ -688,15 +676,14 @@ func (m *model) rescale(s *state) *state { } } summFreq += escFreq - (escFreq >> 1) - c.setSummFreq(summFreq) + m.a.contextSetSummFreq(c, summFreq) return s } -func (m *model) decodeBinSymbol() (*state, error) { - c := m.minC - s := &c.states()[0] +func (m *model) decodeBinSymbol(c context) (*state, error) { + s := &m.a.contextStates(c)[0] - ns := c.suffix().numStates() + ns := m.a.contextNumStates(m.a.contextSuffix(c)) i := m.prevSuccess + ns2BSIndex[ns-1] + byte(m.runLength>>26)&0x20 if m.prevSym >= 64 { i += 8 @@ -725,10 +712,9 @@ func (m *model) decodeBinSymbol() (*state, error) { return nil, err } -func (m *model) decodeSymbol1() (*state, error) { - c := m.minC - states := c.states() - scale := uint32(c.summFreq()) +func (m *model) decodeSymbol1(c context) (*state, error) { + states := m.a.contextStates(c) + scale := uint32(m.a.contextSummFreq(c)) // protect against divide by zero // TODO: look at why this happens, may be problem elsewhere if scale == 0 { @@ -746,7 +732,7 @@ func (m *model) decodeSymbol1() (*state, error) { } err := m.rc.decode(n-uint32(s.freq), n) s.freq += 4 - c.setSummFreq(uint16(scale + 4)) + m.a.contextSetSummFreq(c, uint16(scale+4)) if i == 0 { if 2*n > scale { m.prevSuccess = 1 @@ -759,7 +745,7 @@ func (m *model) decodeSymbol1() (*state, error) { states[i-1], states[i] = states[i], states[i-1] s = &states[i-1] } - return m.rescale(s), err + return m.rescale(c, s), err } for _, s := range states { @@ -768,8 +754,8 @@ func (m *model) decodeSymbol1() (*state, error) { return nil, m.rc.decode(n, scale) } -func (m *model) makeEscFreq(c *context, numMasked int) *see2Context { - ns := c.numStates() +func (m *model) makeEscFreq(c context, numMasked int) *see2Context { + ns := m.a.contextNumStates(c) if ns == 256 { return nil } @@ -779,10 +765,10 @@ func (m *model) makeEscFreq(c *context, numMasked int) *see2Context { if m.prevSym >= 64 { i = 8 } - if diff < c.suffix().numStates()-ns { + if diff < m.a.contextNumStates(m.a.contextSuffix(c))-ns { i++ } - if int(c.summFreq()) < 11*ns { + if int(m.a.contextSummFreq(c)) < 11*ns { i += 2 } if numMasked > diff { @@ -791,22 +777,21 @@ func (m *model) makeEscFreq(c *context, numMasked int) *see2Context { return &m.see2Cont[ns2Index[diff-1]][i] } -func (m *model) decodeSymbol2(numMasked int) (*state, error) { - c := m.minC - +func (m *model) decodeSymbol2(c context, numMasked int) (*state, error) { see := m.makeEscFreq(c, numMasked) scale := see.mean() var i int var hi uint32 - states := c.states() - sl := make([]*state, len(states)-numMasked) + states := m.a.contextStates(c) + n := len(states) - numMasked + sl := m.ibuf[:n] for j := range sl { for m.charMask[states[i].sym] == m.escCount { i++ } hi += uint32(states[i].freq) - sl[j] = &states[i] + sl[j] = i i++ } @@ -821,18 +806,19 @@ func (m *model) decodeSymbol2(numMasked int) (*state, error) { if see != nil { see.summ += uint16(scale) } - for _, s := range sl { - m.charMask[s.sym] = m.escCount + for _, i := range sl { + m.charMask[states[i].sym] = m.escCount } return nil, err } - hi = uint32(sl[0].freq) + hi = uint32(states[sl[0]].freq) + n = 0 for hi <= count { - sl = sl[1:] - hi += uint32(sl[0].freq) + n++ + hi += uint32(states[sl[n]].freq) } - s := sl[0] + s := &states[sl[n]] err := m.rc.decode(hi-uint32(s.freq), hi) @@ -842,37 +828,25 @@ func (m *model) decodeSymbol2(numMasked int) (*state, error) { m.runLength = m.initRL s.freq += 4 - c.setSummFreq(c.summFreq() + 4) - return m.rescale(s), err + m.a.contextIncSummFreq(c, 4) + return m.rescale(c, s), err } -func (c *context) findState(sym byte) *state { - var i int - states := c.states() - for i = range states { - if states[i].sym == sym { - break - } - } - return &states[i] -} - -func (m *model) createSuccessors(s, ss *state) *context { - var sl []*state +func (m *model) createSuccessors(c context, s, ss *state) context { + sl := m.sbuf[:0] if m.orderFall != 0 { sl = append(sl, s) } - c := m.minC - for suff := c.suffix(); suff != nil; suff = c.suffix() { + for suff := m.a.contextSuffix(c); suff > 0; suff = m.a.contextSuffix(c) { c = suff if ss == nil { - ss = c.findState(s.sym) + ss = m.a.findState(c, s.sym) } if ss.succ != s.succ { - c = m.a.succContext(ss.succ) + c = succContext(ss.succ) break } sl = append(sl, ss) @@ -887,12 +861,12 @@ func (m *model) createSuccessors(s, ss *state) *context { up.sym = m.a.succByte(s.succ) up.succ = m.a.nextByteAddr(s.succ) - states := c.states() + states := m.a.contextStates(c) if len(states) > 1 { - s = c.findState(up.sym) + s = m.a.findState(c, up.sym) cf := uint16(s.freq) - 1 - s0 := c.summFreq() - uint16(len(states)) - cf + s0 := m.a.contextSummFreq(c) - uint16(len(states)) - cf if 2*cf <= s0 { if 5*cf > s0 { @@ -909,20 +883,18 @@ func (m *model) createSuccessors(s, ss *state) *context { for i := len(sl) - 1; i >= 0; i-- { c = m.a.newContext(up, c) - if c == nil { - return nil + if c == 0 { + return c } - sl[i].succ = c.succPtr() + sl[i].succ = int32(c) } return c } -func (m *model) update(s *state) { +func (m *model) update(minC, maxC context, s *state) context { if m.orderFall == 0 { - if c := m.a.succContext(s.succ); c != nil { - m.minC = c - m.maxC = c - return + if s.succ > 0 { + return context(s.succ) } } @@ -935,9 +907,9 @@ func (m *model) update(s *state) { var ss *state // matching minC.suffix state - if s.freq < maxFreq/4 && m.minC.suffix() != nil { - c := m.minC.suffix() - states := c.states() + if s.freq < maxFreq/4 && m.a.contextSuffix(minC) > 0 { + c := m.a.contextSuffix(minC) + states := m.a.contextStates(c) var i int if len(states) > 1 { @@ -950,7 +922,7 @@ func (m *model) update(s *state) { } if states[i].freq < maxFreq-9 { states[i].freq += 2 - c.setSummFreq(c.summFreq() + 2) + m.a.contextIncSummFreq(c, 2) } } else if states[0].freq < 32 { states[0].freq++ @@ -959,57 +931,49 @@ func (m *model) update(s *state) { } if m.orderFall == 0 { - c := m.createSuccessors(s, ss) - if c == nil { - m.restart() - } else { - m.minC = c - m.maxC = c - s.succ = c.succPtr() - } - return + minC = m.createSuccessors(minC, s, ss) + s.succ = int32(minC) + return minC } succ := m.a.pushByte(s.sym) - if m.a.succIsNil(succ) { - m.restart() - return + if succ == 0 { + return context(0) } - var minC *context - if m.a.succIsNil(s.succ) { + var newC context + if s.succ == 0 { s.succ = succ - minC = m.minC + newC = minC } else { - minC = m.a.succContext(s.succ) - if minC == nil { - minC = m.createSuccessors(s, ss) - if minC == nil { - m.restart() - return + if s.succ > 0 { + newC = context(s.succ) + } else { + newC = m.createSuccessors(minC, s, ss) + if newC == 0 { + return context(0) } } m.orderFall-- if m.orderFall == 0 { - succ = minC.succPtr() - if m.maxC.notEq(m.minC) { + succ = int32(newC) + if maxC != minC { m.a.popByte() } } } - n := m.minC.numStates() - s0 := int(m.minC.summFreq()) - n - int(s.freq-1) - for c := m.maxC; c.notEq(m.minC); c = c.suffix() { + n := m.a.contextNumStates(minC) + s0 := int(m.a.contextSummFreq(minC)) - n - int(s.freq-1) + for c := maxC; c != minC; c = m.a.contextSuffix(c) { var summFreq uint16 - states := c.expandStates() + states := m.a.expandStates(c) if states == nil { - m.restart() - return + return context(0) } if ns := len(states) - 1; ns != 1 { - summFreq = c.summFreq() + summFreq = m.a.contextSummFreq(c) if 4*ns <= n && int(summFreq) <= 8*ns { summFreq += 2 } @@ -1056,41 +1020,40 @@ func (m *model) update(s *state) { summFreq += 3 } states[len(states)-1] = state{sym: s.sym, freq: freq, succ: succ} - c.setSummFreq(summFreq) + m.a.contextSetSummFreq(c, summFreq) } - m.minC = minC - m.maxC = minC + return newC } func (m *model) ReadByte() (byte, error) { - if m.minC == nil { - return 0, errCorruptPPM + if m.c == 0 { + m.restart() } + minC := m.c + maxC := minC var s *state var err error - if m.minC.numStates() == 1 { - s, err = m.decodeBinSymbol() + if m.a.contextNumStates(minC) == 1 { + s, err = m.decodeBinSymbol(minC) } else { - s, err = m.decodeSymbol1() + s, err = m.decodeSymbol1(minC) } for s == nil && err == nil { - n := m.minC.numStates() - for m.minC.numStates() == n { + n := m.a.contextNumStates(minC) + for m.a.contextNumStates(minC) == n { m.orderFall++ - m.minC = m.minC.suffix() - if m.minC == nil { + minC = m.a.contextSuffix(minC) + if minC <= 0 { return 0, errCorruptPPM } } - s, err = m.decodeSymbol2(n) + s, err = m.decodeSymbol2(minC, n) } if err != nil { return 0, err } - // save sym so it doesn't get overwritten by a possible restart() - sym := s.sym - m.update(s) - m.prevSym = sym - return sym, nil + m.c = m.update(minC, maxC, s) + m.prevSym = s.sym + return s.sym, nil } diff --git a/vendor/github.com/pierrec/lz4/lz4.go b/vendor/github.com/pierrec/lz4/lz4.go index 6c73539..a3284bd 100644 --- a/vendor/github.com/pierrec/lz4/lz4.go +++ b/vendor/github.com/pierrec/lz4/lz4.go @@ -10,9 +10,10 @@ // package lz4 -import "math/bits" - -import "sync" +import ( + "math/bits" + "sync" +) const ( // Extension is the LZ4 frame file name extension @@ -20,8 +21,9 @@ const ( // Version is the LZ4 frame format version Version = 1 - frameMagic uint32 = 0x184D2204 - frameSkipMagic uint32 = 0x184D2A50 + frameMagic uint32 = 0x184D2204 + frameSkipMagic uint32 = 0x184D2A50 + frameMagicLegacy uint32 = 0x184C2102 // The following constants are used to setup the compression algorithm. minMatch = 4 // the minimum size of the match sequence size (4 bytes) @@ -108,6 +110,7 @@ type Header struct { done bool // Header processed flag (Read or Write and checked). } +// Reset reset internal status func (h *Header) Reset() { h.done = false } diff --git a/vendor/github.com/pierrec/lz4/reader_legacy.go b/vendor/github.com/pierrec/lz4/reader_legacy.go new file mode 100644 index 0000000..1670a77 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/reader_legacy.go @@ -0,0 +1,207 @@ +package lz4 + +import ( + "encoding/binary" + "fmt" + "io" +) + +// ReaderLegacy implements the LZ4Demo frame decoder. +// The Header is set after the first call to Read(). +type ReaderLegacy struct { + Header + // Handler called when a block has been successfully read. + // It provides the number of bytes read. + OnBlockDone func(size int) + + lastBlock bool + buf [8]byte // Scrap buffer. + pos int64 // Current position in src. + src io.Reader // Source. + zdata []byte // Compressed data. + data []byte // Uncompressed data. + idx int // Index of unread bytes into data. + skip int64 // Bytes to skip before next read. + dpos int64 // Position in dest +} + +// NewReaderLegacy returns a new LZ4Demo frame decoder. +// No access to the underlying io.Reader is performed. +func NewReaderLegacy(src io.Reader) *ReaderLegacy { + r := &ReaderLegacy{src: src} + return r +} + +// readHeader checks the frame magic number and parses the frame descriptoz. +// Skippable frames are supported even as a first frame although the LZ4 +// specifications recommends skippable frames not to be used as first frames. +func (z *ReaderLegacy) readLegacyHeader() error { + z.lastBlock = false + magic, err := z.readUint32() + if err != nil { + z.pos += 4 + if err == io.ErrUnexpectedEOF { + return io.EOF + } + return err + } + if magic != frameMagicLegacy { + return ErrInvalid + } + z.pos += 4 + + // Legacy has fixed 8MB blocksizes + // https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md#legacy-frame + bSize := blockSize4M * 2 + + // Allocate the compressed/uncompressed buffers. + // The compressed buffer cannot exceed the uncompressed one. + if n := 2 * bSize; cap(z.zdata) < n { + z.zdata = make([]byte, n, n) + } + if debugFlag { + debug("header block max size size=%d", bSize) + } + z.zdata = z.zdata[:bSize] + z.data = z.zdata[:cap(z.zdata)][bSize:] + z.idx = len(z.data) + + z.Header.done = true + if debugFlag { + debug("header read: %v", z.Header) + } + + return nil +} + +// Read decompresses data from the underlying source into the supplied buffer. +// +// Since there can be multiple streams concatenated, Header values may +// change between calls to Read(). If that is the case, no data is actually read from +// the underlying io.Reader, to allow for potential input buffer resizing. +func (z *ReaderLegacy) Read(buf []byte) (int, error) { + if debugFlag { + debug("Read buf len=%d", len(buf)) + } + if !z.Header.done { + if err := z.readLegacyHeader(); err != nil { + return 0, err + } + if debugFlag { + debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d", + len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx) + } + } + + if len(buf) == 0 { + return 0, nil + } + + if z.idx == len(z.data) { + // No data ready for reading, process the next block. + if debugFlag { + debug(" reading block from writer %d %d", z.idx, blockSize4M*2) + } + + // Reset uncompressed buffer + z.data = z.zdata[:cap(z.zdata)][len(z.zdata):] + + bLen, err := z.readUint32() + if err != nil { + return 0, err + } + if debugFlag { + debug(" bLen %d (0x%x) offset = %d (0x%x)", bLen, bLen, z.pos, z.pos) + } + z.pos += 4 + + // Legacy blocks are always compressed, even when detrimental + if debugFlag { + debug(" compressed block size %d", bLen) + } + + if int(bLen) > cap(z.data) { + return 0, fmt.Errorf("lz4: invalid block size: %d", bLen) + } + zdata := z.zdata[:bLen] + if _, err := io.ReadFull(z.src, zdata); err != nil { + return 0, err + } + z.pos += int64(bLen) + + n, err := UncompressBlock(zdata, z.data) + if err != nil { + return 0, err + } + + z.data = z.data[:n] + if z.OnBlockDone != nil { + z.OnBlockDone(n) + } + + z.idx = 0 + + // Legacy blocks are fixed to 8MB, if we read a decompressed block smaller than this + // it means we've reached the end... + if n < blockSize4M*2 { + z.lastBlock = true + } + } + + if z.skip > int64(len(z.data[z.idx:])) { + z.skip -= int64(len(z.data[z.idx:])) + z.dpos += int64(len(z.data[z.idx:])) + z.idx = len(z.data) + return 0, nil + } + + z.idx += int(z.skip) + z.dpos += z.skip + z.skip = 0 + + n := copy(buf, z.data[z.idx:]) + z.idx += n + z.dpos += int64(n) + if debugFlag { + debug("%v] copied %d bytes to input (%d:%d)", z.lastBlock, n, z.idx, len(z.data)) + } + if z.lastBlock && len(z.data) == z.idx { + return n, io.EOF + } + return n, nil +} + +// Seek implements io.Seeker, but supports seeking forward from the current +// position only. Any other seek will return an error. Allows skipping output +// bytes which aren't needed, which in some scenarios is faster than reading +// and discarding them. +// Note this may cause future calls to Read() to read 0 bytes if all of the +// data they would have returned is skipped. +func (z *ReaderLegacy) Seek(offset int64, whence int) (int64, error) { + if offset < 0 || whence != io.SeekCurrent { + return z.dpos + z.skip, ErrUnsupportedSeek + } + z.skip += offset + return z.dpos + z.skip, nil +} + +// Reset discards the Reader's state and makes it equivalent to the +// result of its original state from NewReader, but reading from r instead. +// This permits reusing a Reader rather than allocating a new one. +func (z *ReaderLegacy) Reset(r io.Reader) { + z.Header = Header{} + z.pos = 0 + z.src = r + z.zdata = z.zdata[:0] + z.data = z.data[:0] + z.idx = 0 +} + +// readUint32 reads an uint32 into the supplied buffer. +// The idea is to make use of the already allocated buffers avoiding additional allocations. +func (z *ReaderLegacy) readUint32() (uint32, error) { + buf := z.buf[:4] + _, err := io.ReadFull(z.src, buf) + x := binary.LittleEndian.Uint32(buf) + return x, err +} diff --git a/vendor/github.com/pierrec/lz4/writer.go b/vendor/github.com/pierrec/lz4/writer.go index 324f138..f066d56 100644 --- a/vendor/github.com/pierrec/lz4/writer.go +++ b/vendor/github.com/pierrec/lz4/writer.go @@ -3,9 +3,10 @@ package lz4 import ( "encoding/binary" "fmt" - "github.com/pierrec/lz4/internal/xxh32" "io" "runtime" + + "github.com/pierrec/lz4/internal/xxh32" ) // zResult contains the results of compressing a block. @@ -83,10 +84,8 @@ func (z *Writer) WithConcurrency(n int) *Writer { z.err = err } } - if isCompressed := res.size&compressedBlockFlag == 0; isCompressed { - // It is now safe to release the buffer as no longer in use by any goroutine. - putBuffer(cap(res.data), res.data) - } + // It is now safe to release the buffer as no longer in use by any goroutine. + putBuffer(cap(res.data), res.data) if h := z.OnBlockDone; h != nil { h(n) } @@ -230,7 +229,12 @@ func (z *Writer) compressBlock(data []byte) error { if z.c != nil { c := make(chan zResult) z.c <- c // Send now to guarantee order - go writerCompressBlock(c, z.Header, data) + + // get a buffer from the pool and copy the data over + block := getBuffer(z.Header.BlockMaxSize)[:len(data)] + copy(block, data) + + go writerCompressBlock(c, z.Header, block) return nil } @@ -298,7 +302,9 @@ func (z *Writer) Flush() error { return nil } - data := z.data[:z.idx] + data := getBuffer(z.Header.BlockMaxSize)[:len(z.data[:z.idx])] + copy(data, z.data[:z.idx]) + z.idx = 0 if z.c == nil { return z.compressBlock(data) @@ -370,6 +376,10 @@ func (z *Writer) Reset(w io.Writer) { z.checksum.Reset() z.idx = 0 z.err = nil + // reset hashtable to ensure deterministic output. + for i := range z.hashtable { + z.hashtable[i] = 0 + } z.WithConcurrency(n) } @@ -397,9 +407,13 @@ func writerCompressBlock(c chan zResult, header Header, data []byte) { if zn > 0 && zn < len(data) { res.size = uint32(zn) res.data = zdata[:zn] + // release the uncompressed block since it is not used anymore + putBuffer(header.BlockMaxSize, data) } else { res.size = uint32(len(data)) | compressedBlockFlag res.data = data + // release the compressed block since it was not used + putBuffer(header.BlockMaxSize, zdata) } if header.BlockChecksum { res.checksum = xxh32.ChecksumZero(res.data) diff --git a/vendor/github.com/pierrec/lz4/writer_legacy.go b/vendor/github.com/pierrec/lz4/writer_legacy.go new file mode 100644 index 0000000..ca8dc8c --- /dev/null +++ b/vendor/github.com/pierrec/lz4/writer_legacy.go @@ -0,0 +1,182 @@ +package lz4 + +import ( + "encoding/binary" + "io" +) + +// WriterLegacy implements the LZ4Demo frame decoder. +type WriterLegacy struct { + Header + // Handler called when a block has been successfully read. + // It provides the number of bytes read. + OnBlockDone func(size int) + + dst io.Writer // Destination. + data []byte // Data to be compressed + buffer for compressed data. + idx int // Index into data. + hashtable [winSize]int // Hash table used in CompressBlock(). +} + +// NewWriterLegacy returns a new LZ4 encoder for the legacy frame format. +// No access to the underlying io.Writer is performed. +// The supplied Header is checked at the first Write. +// It is ok to change it before the first Write but then not until a Reset() is performed. +func NewWriterLegacy(dst io.Writer) *WriterLegacy { + z := new(WriterLegacy) + z.Reset(dst) + return z +} + +// Write compresses data from the supplied buffer into the underlying io.Writer. +// Write does not return until the data has been written. +func (z *WriterLegacy) Write(buf []byte) (int, error) { + if !z.Header.done { + if err := z.writeHeader(); err != nil { + return 0, err + } + } + if debugFlag { + debug("input buffer len=%d index=%d", len(buf), z.idx) + } + + zn := len(z.data) + var n int + for len(buf) > 0 { + if z.idx == 0 && len(buf) >= zn { + // Avoid a copy as there is enough data for a block. + if err := z.compressBlock(buf[:zn]); err != nil { + return n, err + } + n += zn + buf = buf[zn:] + continue + } + // Accumulate the data to be compressed. + m := copy(z.data[z.idx:], buf) + n += m + z.idx += m + buf = buf[m:] + if debugFlag { + debug("%d bytes copied to buf, current index %d", n, z.idx) + } + + if z.idx < len(z.data) { + // Buffer not filled. + if debugFlag { + debug("need more data for compression") + } + return n, nil + } + + // Buffer full. + if err := z.compressBlock(z.data); err != nil { + return n, err + } + z.idx = 0 + } + + return n, nil +} + +// writeHeader builds and writes the header to the underlying io.Writer. +func (z *WriterLegacy) writeHeader() error { + // Legacy has fixed 8MB blocksizes + // https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md#legacy-frame + bSize := 2 * blockSize4M + + buf := make([]byte, 2*bSize, 2*bSize) + z.data = buf[:bSize] // Uncompressed buffer is the first half. + + z.idx = 0 + + // Header consists of one mageic number, write it out. + if err := binary.Write(z.dst, binary.LittleEndian, frameMagicLegacy); err != nil { + return err + } + z.Header.done = true + if debugFlag { + debug("wrote header %v", z.Header) + } + + return nil +} + +// compressBlock compresses a block. +func (z *WriterLegacy) compressBlock(data []byte) error { + bSize := 2 * blockSize4M + zdata := z.data[bSize:cap(z.data)] + // The compressed block size cannot exceed the input's. + var zn int + + if level := z.Header.CompressionLevel; level != 0 { + zn, _ = CompressBlockHC(data, zdata, level) + } else { + zn, _ = CompressBlock(data, zdata, z.hashtable[:]) + } + + if debugFlag { + debug("block compression %d => %d", len(data), zn) + } + zdata = zdata[:zn] + + // Write the block. + if err := binary.Write(z.dst, binary.LittleEndian, uint32(zn)); err != nil { + return err + } + written, err := z.dst.Write(zdata) + if err != nil { + return err + } + if h := z.OnBlockDone; h != nil { + h(written) + } + return nil +} + +// Flush flushes any pending compressed data to the underlying writer. +// Flush does not return until the data has been written. +// If the underlying writer returns an error, Flush returns that error. +func (z *WriterLegacy) Flush() error { + if debugFlag { + debug("flush with index %d", z.idx) + } + if z.idx == 0 { + return nil + } + + data := z.data[:z.idx] + z.idx = 0 + return z.compressBlock(data) +} + +// Close closes the WriterLegacy, flushing any unwritten data to the underlying io.Writer, but does not close the underlying io.Writer. +func (z *WriterLegacy) Close() error { + if !z.Header.done { + if err := z.writeHeader(); err != nil { + return err + } + } + if err := z.Flush(); err != nil { + return err + } + + if debugFlag { + debug("writing last empty block") + } + + return nil +} + +// Reset clears the state of the WriterLegacy z such that it is equivalent to its +// initial state from NewWriterLegacy, but instead writing to w. +// No access to the underlying io.Writer is performed. +func (z *WriterLegacy) Reset(w io.Writer) { + z.Header.Reset() + z.dst = w + z.idx = 0 + // reset hashtable to ensure deterministic output. + for i := range z.hashtable { + z.hashtable[i] = 0 + } +} diff --git a/vendor/github.com/stretchr/testify/assert/assertion_compare.go b/vendor/github.com/stretchr/testify/assert/assertion_compare.go index dc20039..41649d2 100644 --- a/vendor/github.com/stretchr/testify/assert/assertion_compare.go +++ b/vendor/github.com/stretchr/testify/assert/assertion_compare.go @@ -13,12 +13,42 @@ const ( compareGreater ) +var ( + intType = reflect.TypeOf(int(1)) + int8Type = reflect.TypeOf(int8(1)) + int16Type = reflect.TypeOf(int16(1)) + int32Type = reflect.TypeOf(int32(1)) + int64Type = reflect.TypeOf(int64(1)) + + uintType = reflect.TypeOf(uint(1)) + uint8Type = reflect.TypeOf(uint8(1)) + uint16Type = reflect.TypeOf(uint16(1)) + uint32Type = reflect.TypeOf(uint32(1)) + uint64Type = reflect.TypeOf(uint64(1)) + + float32Type = reflect.TypeOf(float32(1)) + float64Type = reflect.TypeOf(float64(1)) + + stringType = reflect.TypeOf("") +) + func compare(obj1, obj2 interface{}, kind reflect.Kind) (CompareType, bool) { + obj1Value := reflect.ValueOf(obj1) + obj2Value := reflect.ValueOf(obj2) + + // throughout this switch we try and avoid calling .Convert() if possible, + // as this has a pretty big performance impact switch kind { case reflect.Int: { - intobj1 := obj1.(int) - intobj2 := obj2.(int) + intobj1, ok := obj1.(int) + if !ok { + intobj1 = obj1Value.Convert(intType).Interface().(int) + } + intobj2, ok := obj2.(int) + if !ok { + intobj2 = obj2Value.Convert(intType).Interface().(int) + } if intobj1 > intobj2 { return compareGreater, true } @@ -31,8 +61,14 @@ func compare(obj1, obj2 interface{}, kind reflect.Kind) (CompareType, bool) { } case reflect.Int8: { - int8obj1 := obj1.(int8) - int8obj2 := obj2.(int8) + int8obj1, ok := obj1.(int8) + if !ok { + int8obj1 = obj1Value.Convert(int8Type).Interface().(int8) + } + int8obj2, ok := obj2.(int8) + if !ok { + int8obj2 = obj2Value.Convert(int8Type).Interface().(int8) + } if int8obj1 > int8obj2 { return compareGreater, true } @@ -45,8 +81,14 @@ func compare(obj1, obj2 interface{}, kind reflect.Kind) (CompareType, bool) { } case reflect.Int16: { - int16obj1 := obj1.(int16) - int16obj2 := obj2.(int16) + int16obj1, ok := obj1.(int16) + if !ok { + int16obj1 = obj1Value.Convert(int16Type).Interface().(int16) + } + int16obj2, ok := obj2.(int16) + if !ok { + int16obj2 = obj2Value.Convert(int16Type).Interface().(int16) + } if int16obj1 > int16obj2 { return compareGreater, true } @@ -59,8 +101,14 @@ func compare(obj1, obj2 interface{}, kind reflect.Kind) (CompareType, bool) { } case reflect.Int32: { - int32obj1 := obj1.(int32) - int32obj2 := obj2.(int32) + int32obj1, ok := obj1.(int32) + if !ok { + int32obj1 = obj1Value.Convert(int32Type).Interface().(int32) + } + int32obj2, ok := obj2.(int32) + if !ok { + int32obj2 = obj2Value.Convert(int32Type).Interface().(int32) + } if int32obj1 > int32obj2 { return compareGreater, true } @@ -73,8 +121,14 @@ func compare(obj1, obj2 interface{}, kind reflect.Kind) (CompareType, bool) { } case reflect.Int64: { - int64obj1 := obj1.(int64) - int64obj2 := obj2.(int64) + int64obj1, ok := obj1.(int64) + if !ok { + int64obj1 = obj1Value.Convert(int64Type).Interface().(int64) + } + int64obj2, ok := obj2.(int64) + if !ok { + int64obj2 = obj2Value.Convert(int64Type).Interface().(int64) + } if int64obj1 > int64obj2 { return compareGreater, true } @@ -87,8 +141,14 @@ func compare(obj1, obj2 interface{}, kind reflect.Kind) (CompareType, bool) { } case reflect.Uint: { - uintobj1 := obj1.(uint) - uintobj2 := obj2.(uint) + uintobj1, ok := obj1.(uint) + if !ok { + uintobj1 = obj1Value.Convert(uintType).Interface().(uint) + } + uintobj2, ok := obj2.(uint) + if !ok { + uintobj2 = obj2Value.Convert(uintType).Interface().(uint) + } if uintobj1 > uintobj2 { return compareGreater, true } @@ -101,8 +161,14 @@ func compare(obj1, obj2 interface{}, kind reflect.Kind) (CompareType, bool) { } case reflect.Uint8: { - uint8obj1 := obj1.(uint8) - uint8obj2 := obj2.(uint8) + uint8obj1, ok := obj1.(uint8) + if !ok { + uint8obj1 = obj1Value.Convert(uint8Type).Interface().(uint8) + } + uint8obj2, ok := obj2.(uint8) + if !ok { + uint8obj2 = obj2Value.Convert(uint8Type).Interface().(uint8) + } if uint8obj1 > uint8obj2 { return compareGreater, true } @@ -115,8 +181,14 @@ func compare(obj1, obj2 interface{}, kind reflect.Kind) (CompareType, bool) { } case reflect.Uint16: { - uint16obj1 := obj1.(uint16) - uint16obj2 := obj2.(uint16) + uint16obj1, ok := obj1.(uint16) + if !ok { + uint16obj1 = obj1Value.Convert(uint16Type).Interface().(uint16) + } + uint16obj2, ok := obj2.(uint16) + if !ok { + uint16obj2 = obj2Value.Convert(uint16Type).Interface().(uint16) + } if uint16obj1 > uint16obj2 { return compareGreater, true } @@ -129,8 +201,14 @@ func compare(obj1, obj2 interface{}, kind reflect.Kind) (CompareType, bool) { } case reflect.Uint32: { - uint32obj1 := obj1.(uint32) - uint32obj2 := obj2.(uint32) + uint32obj1, ok := obj1.(uint32) + if !ok { + uint32obj1 = obj1Value.Convert(uint32Type).Interface().(uint32) + } + uint32obj2, ok := obj2.(uint32) + if !ok { + uint32obj2 = obj2Value.Convert(uint32Type).Interface().(uint32) + } if uint32obj1 > uint32obj2 { return compareGreater, true } @@ -143,8 +221,14 @@ func compare(obj1, obj2 interface{}, kind reflect.Kind) (CompareType, bool) { } case reflect.Uint64: { - uint64obj1 := obj1.(uint64) - uint64obj2 := obj2.(uint64) + uint64obj1, ok := obj1.(uint64) + if !ok { + uint64obj1 = obj1Value.Convert(uint64Type).Interface().(uint64) + } + uint64obj2, ok := obj2.(uint64) + if !ok { + uint64obj2 = obj2Value.Convert(uint64Type).Interface().(uint64) + } if uint64obj1 > uint64obj2 { return compareGreater, true } @@ -157,8 +241,14 @@ func compare(obj1, obj2 interface{}, kind reflect.Kind) (CompareType, bool) { } case reflect.Float32: { - float32obj1 := obj1.(float32) - float32obj2 := obj2.(float32) + float32obj1, ok := obj1.(float32) + if !ok { + float32obj1 = obj1Value.Convert(float32Type).Interface().(float32) + } + float32obj2, ok := obj2.(float32) + if !ok { + float32obj2 = obj2Value.Convert(float32Type).Interface().(float32) + } if float32obj1 > float32obj2 { return compareGreater, true } @@ -171,8 +261,14 @@ func compare(obj1, obj2 interface{}, kind reflect.Kind) (CompareType, bool) { } case reflect.Float64: { - float64obj1 := obj1.(float64) - float64obj2 := obj2.(float64) + float64obj1, ok := obj1.(float64) + if !ok { + float64obj1 = obj1Value.Convert(float64Type).Interface().(float64) + } + float64obj2, ok := obj2.(float64) + if !ok { + float64obj2 = obj2Value.Convert(float64Type).Interface().(float64) + } if float64obj1 > float64obj2 { return compareGreater, true } @@ -185,8 +281,14 @@ func compare(obj1, obj2 interface{}, kind reflect.Kind) (CompareType, bool) { } case reflect.String: { - stringobj1 := obj1.(string) - stringobj2 := obj2.(string) + stringobj1, ok := obj1.(string) + if !ok { + stringobj1 = obj1Value.Convert(stringType).Interface().(string) + } + stringobj2, ok := obj2.(string) + if !ok { + stringobj2 = obj2Value.Convert(stringType).Interface().(string) + } if stringobj1 > stringobj2 { return compareGreater, true } @@ -240,6 +342,24 @@ func LessOrEqual(t TestingT, e1 interface{}, e2 interface{}, msgAndArgs ...inter return compareTwoValues(t, e1, e2, []CompareType{compareLess, compareEqual}, "\"%v\" is not less than or equal to \"%v\"", msgAndArgs) } +// Positive asserts that the specified element is positive +// +// assert.Positive(t, 1) +// assert.Positive(t, 1.23) +func Positive(t TestingT, e interface{}, msgAndArgs ...interface{}) bool { + zero := reflect.Zero(reflect.TypeOf(e)) + return compareTwoValues(t, e, zero.Interface(), []CompareType{compareGreater}, "\"%v\" is not positive", msgAndArgs) +} + +// Negative asserts that the specified element is negative +// +// assert.Negative(t, -1) +// assert.Negative(t, -1.23) +func Negative(t TestingT, e interface{}, msgAndArgs ...interface{}) bool { + zero := reflect.Zero(reflect.TypeOf(e)) + return compareTwoValues(t, e, zero.Interface(), []CompareType{compareLess}, "\"%v\" is not negative", msgAndArgs) +} + func compareTwoValues(t TestingT, e1 interface{}, e2 interface{}, allowedComparesResults []CompareType, failMessage string, msgAndArgs ...interface{}) bool { if h, ok := t.(tHelper); ok { h.Helper() diff --git a/vendor/github.com/stretchr/testify/assert/assertion_format.go b/vendor/github.com/stretchr/testify/assert/assertion_format.go index 49370eb..4dfd122 100644 --- a/vendor/github.com/stretchr/testify/assert/assertion_format.go +++ b/vendor/github.com/stretchr/testify/assert/assertion_format.go @@ -114,6 +114,24 @@ func Errorf(t TestingT, err error, msg string, args ...interface{}) bool { return Error(t, err, append([]interface{}{msg}, args...)...) } +// ErrorAsf asserts that at least one of the errors in err's chain matches target, and if so, sets target to that error value. +// This is a wrapper for errors.As. +func ErrorAsf(t TestingT, err error, target interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return ErrorAs(t, err, target, append([]interface{}{msg}, args...)...) +} + +// ErrorIsf asserts that at least one of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func ErrorIsf(t TestingT, err error, target error, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return ErrorIs(t, err, target, append([]interface{}{msg}, args...)...) +} + // Eventuallyf asserts that given condition will be met in waitFor time, // periodically checking target function each tick. // @@ -321,6 +339,54 @@ func InEpsilonSlicef(t TestingT, expected interface{}, actual interface{}, epsil return InEpsilonSlice(t, expected, actual, epsilon, append([]interface{}{msg}, args...)...) } +// IsDecreasingf asserts that the collection is decreasing +// +// assert.IsDecreasingf(t, []int{2, 1, 0}, "error message %s", "formatted") +// assert.IsDecreasingf(t, []float{2, 1}, "error message %s", "formatted") +// assert.IsDecreasingf(t, []string{"b", "a"}, "error message %s", "formatted") +func IsDecreasingf(t TestingT, object interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return IsDecreasing(t, object, append([]interface{}{msg}, args...)...) +} + +// IsIncreasingf asserts that the collection is increasing +// +// assert.IsIncreasingf(t, []int{1, 2, 3}, "error message %s", "formatted") +// assert.IsIncreasingf(t, []float{1, 2}, "error message %s", "formatted") +// assert.IsIncreasingf(t, []string{"a", "b"}, "error message %s", "formatted") +func IsIncreasingf(t TestingT, object interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return IsIncreasing(t, object, append([]interface{}{msg}, args...)...) +} + +// IsNonDecreasingf asserts that the collection is not decreasing +// +// assert.IsNonDecreasingf(t, []int{1, 1, 2}, "error message %s", "formatted") +// assert.IsNonDecreasingf(t, []float{1, 2}, "error message %s", "formatted") +// assert.IsNonDecreasingf(t, []string{"a", "b"}, "error message %s", "formatted") +func IsNonDecreasingf(t TestingT, object interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return IsNonDecreasing(t, object, append([]interface{}{msg}, args...)...) +} + +// IsNonIncreasingf asserts that the collection is not increasing +// +// assert.IsNonIncreasingf(t, []int{2, 1, 1}, "error message %s", "formatted") +// assert.IsNonIncreasingf(t, []float{2, 1}, "error message %s", "formatted") +// assert.IsNonIncreasingf(t, []string{"b", "a"}, "error message %s", "formatted") +func IsNonIncreasingf(t TestingT, object interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return IsNonIncreasing(t, object, append([]interface{}{msg}, args...)...) +} + // IsTypef asserts that the specified objects are of the same type. func IsTypef(t TestingT, expectedType interface{}, object interface{}, msg string, args ...interface{}) bool { if h, ok := t.(tHelper); ok { @@ -375,6 +441,17 @@ func LessOrEqualf(t TestingT, e1 interface{}, e2 interface{}, msg string, args . return LessOrEqual(t, e1, e2, append([]interface{}{msg}, args...)...) } +// Negativef asserts that the specified element is negative +// +// assert.Negativef(t, -1, "error message %s", "formatted") +// assert.Negativef(t, -1.23, "error message %s", "formatted") +func Negativef(t TestingT, e interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Negative(t, e, append([]interface{}{msg}, args...)...) +} + // Neverf asserts that the given condition doesn't satisfy in waitFor time, // periodically checking the target function each tick. // @@ -476,6 +553,15 @@ func NotEqualValuesf(t TestingT, expected interface{}, actual interface{}, msg s return NotEqualValues(t, expected, actual, append([]interface{}{msg}, args...)...) } +// NotErrorIsf asserts that at none of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func NotErrorIsf(t TestingT, err error, target error, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return NotErrorIs(t, err, target, append([]interface{}{msg}, args...)...) +} + // NotNilf asserts that the specified object is not nil. // // assert.NotNilf(t, err, "error message %s", "formatted") @@ -572,6 +658,17 @@ func PanicsWithValuef(t TestingT, expected interface{}, f PanicTestFunc, msg str return PanicsWithValue(t, expected, f, append([]interface{}{msg}, args...)...) } +// Positivef asserts that the specified element is positive +// +// assert.Positivef(t, 1, "error message %s", "formatted") +// assert.Positivef(t, 1.23, "error message %s", "formatted") +func Positivef(t TestingT, e interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Positive(t, e, append([]interface{}{msg}, args...)...) +} + // Regexpf asserts that a specified regexp matches a string. // // assert.Regexpf(t, regexp.MustCompile("start"), "it's starting", "error message %s", "formatted") diff --git a/vendor/github.com/stretchr/testify/assert/assertion_forward.go b/vendor/github.com/stretchr/testify/assert/assertion_forward.go index 9db8894..25337a6 100644 --- a/vendor/github.com/stretchr/testify/assert/assertion_forward.go +++ b/vendor/github.com/stretchr/testify/assert/assertion_forward.go @@ -204,6 +204,42 @@ func (a *Assertions) Error(err error, msgAndArgs ...interface{}) bool { return Error(a.t, err, msgAndArgs...) } +// ErrorAs asserts that at least one of the errors in err's chain matches target, and if so, sets target to that error value. +// This is a wrapper for errors.As. +func (a *Assertions) ErrorAs(err error, target interface{}, msgAndArgs ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return ErrorAs(a.t, err, target, msgAndArgs...) +} + +// ErrorAsf asserts that at least one of the errors in err's chain matches target, and if so, sets target to that error value. +// This is a wrapper for errors.As. +func (a *Assertions) ErrorAsf(err error, target interface{}, msg string, args ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return ErrorAsf(a.t, err, target, msg, args...) +} + +// ErrorIs asserts that at least one of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func (a *Assertions) ErrorIs(err error, target error, msgAndArgs ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return ErrorIs(a.t, err, target, msgAndArgs...) +} + +// ErrorIsf asserts that at least one of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func (a *Assertions) ErrorIsf(err error, target error, msg string, args ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return ErrorIsf(a.t, err, target, msg, args...) +} + // Errorf asserts that a function returned an error (i.e. not `nil`). // // actualObj, err := SomeFunction() @@ -631,6 +667,102 @@ func (a *Assertions) InEpsilonf(expected interface{}, actual interface{}, epsilo return InEpsilonf(a.t, expected, actual, epsilon, msg, args...) } +// IsDecreasing asserts that the collection is decreasing +// +// a.IsDecreasing([]int{2, 1, 0}) +// a.IsDecreasing([]float{2, 1}) +// a.IsDecreasing([]string{"b", "a"}) +func (a *Assertions) IsDecreasing(object interface{}, msgAndArgs ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return IsDecreasing(a.t, object, msgAndArgs...) +} + +// IsDecreasingf asserts that the collection is decreasing +// +// a.IsDecreasingf([]int{2, 1, 0}, "error message %s", "formatted") +// a.IsDecreasingf([]float{2, 1}, "error message %s", "formatted") +// a.IsDecreasingf([]string{"b", "a"}, "error message %s", "formatted") +func (a *Assertions) IsDecreasingf(object interface{}, msg string, args ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return IsDecreasingf(a.t, object, msg, args...) +} + +// IsIncreasing asserts that the collection is increasing +// +// a.IsIncreasing([]int{1, 2, 3}) +// a.IsIncreasing([]float{1, 2}) +// a.IsIncreasing([]string{"a", "b"}) +func (a *Assertions) IsIncreasing(object interface{}, msgAndArgs ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return IsIncreasing(a.t, object, msgAndArgs...) +} + +// IsIncreasingf asserts that the collection is increasing +// +// a.IsIncreasingf([]int{1, 2, 3}, "error message %s", "formatted") +// a.IsIncreasingf([]float{1, 2}, "error message %s", "formatted") +// a.IsIncreasingf([]string{"a", "b"}, "error message %s", "formatted") +func (a *Assertions) IsIncreasingf(object interface{}, msg string, args ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return IsIncreasingf(a.t, object, msg, args...) +} + +// IsNonDecreasing asserts that the collection is not decreasing +// +// a.IsNonDecreasing([]int{1, 1, 2}) +// a.IsNonDecreasing([]float{1, 2}) +// a.IsNonDecreasing([]string{"a", "b"}) +func (a *Assertions) IsNonDecreasing(object interface{}, msgAndArgs ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return IsNonDecreasing(a.t, object, msgAndArgs...) +} + +// IsNonDecreasingf asserts that the collection is not decreasing +// +// a.IsNonDecreasingf([]int{1, 1, 2}, "error message %s", "formatted") +// a.IsNonDecreasingf([]float{1, 2}, "error message %s", "formatted") +// a.IsNonDecreasingf([]string{"a", "b"}, "error message %s", "formatted") +func (a *Assertions) IsNonDecreasingf(object interface{}, msg string, args ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return IsNonDecreasingf(a.t, object, msg, args...) +} + +// IsNonIncreasing asserts that the collection is not increasing +// +// a.IsNonIncreasing([]int{2, 1, 1}) +// a.IsNonIncreasing([]float{2, 1}) +// a.IsNonIncreasing([]string{"b", "a"}) +func (a *Assertions) IsNonIncreasing(object interface{}, msgAndArgs ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return IsNonIncreasing(a.t, object, msgAndArgs...) +} + +// IsNonIncreasingf asserts that the collection is not increasing +// +// a.IsNonIncreasingf([]int{2, 1, 1}, "error message %s", "formatted") +// a.IsNonIncreasingf([]float{2, 1}, "error message %s", "formatted") +// a.IsNonIncreasingf([]string{"b", "a"}, "error message %s", "formatted") +func (a *Assertions) IsNonIncreasingf(object interface{}, msg string, args ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return IsNonIncreasingf(a.t, object, msg, args...) +} + // IsType asserts that the specified objects are of the same type. func (a *Assertions) IsType(expectedType interface{}, object interface{}, msgAndArgs ...interface{}) bool { if h, ok := a.t.(tHelper); ok { @@ -739,6 +871,28 @@ func (a *Assertions) Lessf(e1 interface{}, e2 interface{}, msg string, args ...i return Lessf(a.t, e1, e2, msg, args...) } +// Negative asserts that the specified element is negative +// +// a.Negative(-1) +// a.Negative(-1.23) +func (a *Assertions) Negative(e interface{}, msgAndArgs ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return Negative(a.t, e, msgAndArgs...) +} + +// Negativef asserts that the specified element is negative +// +// a.Negativef(-1, "error message %s", "formatted") +// a.Negativef(-1.23, "error message %s", "formatted") +func (a *Assertions) Negativef(e interface{}, msg string, args ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return Negativef(a.t, e, msg, args...) +} + // Never asserts that the given condition doesn't satisfy in waitFor time, // periodically checking the target function each tick. // @@ -941,6 +1095,24 @@ func (a *Assertions) NotEqualf(expected interface{}, actual interface{}, msg str return NotEqualf(a.t, expected, actual, msg, args...) } +// NotErrorIs asserts that at none of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func (a *Assertions) NotErrorIs(err error, target error, msgAndArgs ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return NotErrorIs(a.t, err, target, msgAndArgs...) +} + +// NotErrorIsf asserts that at none of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func (a *Assertions) NotErrorIsf(err error, target error, msg string, args ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return NotErrorIsf(a.t, err, target, msg, args...) +} + // NotNil asserts that the specified object is not nil. // // a.NotNil(err) @@ -1133,6 +1305,28 @@ func (a *Assertions) Panicsf(f PanicTestFunc, msg string, args ...interface{}) b return Panicsf(a.t, f, msg, args...) } +// Positive asserts that the specified element is positive +// +// a.Positive(1) +// a.Positive(1.23) +func (a *Assertions) Positive(e interface{}, msgAndArgs ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return Positive(a.t, e, msgAndArgs...) +} + +// Positivef asserts that the specified element is positive +// +// a.Positivef(1, "error message %s", "formatted") +// a.Positivef(1.23, "error message %s", "formatted") +func (a *Assertions) Positivef(e interface{}, msg string, args ...interface{}) bool { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + return Positivef(a.t, e, msg, args...) +} + // Regexp asserts that a specified regexp matches a string. // // a.Regexp(regexp.MustCompile("start"), "it's starting") diff --git a/vendor/github.com/stretchr/testify/assert/assertion_order.go b/vendor/github.com/stretchr/testify/assert/assertion_order.go new file mode 100644 index 0000000..1c3b471 --- /dev/null +++ b/vendor/github.com/stretchr/testify/assert/assertion_order.go @@ -0,0 +1,81 @@ +package assert + +import ( + "fmt" + "reflect" +) + +// isOrdered checks that collection contains orderable elements. +func isOrdered(t TestingT, object interface{}, allowedComparesResults []CompareType, failMessage string, msgAndArgs ...interface{}) bool { + objKind := reflect.TypeOf(object).Kind() + if objKind != reflect.Slice && objKind != reflect.Array { + return false + } + + objValue := reflect.ValueOf(object) + objLen := objValue.Len() + + if objLen <= 1 { + return true + } + + value := objValue.Index(0) + valueInterface := value.Interface() + firstValueKind := value.Kind() + + for i := 1; i < objLen; i++ { + prevValue := value + prevValueInterface := valueInterface + + value = objValue.Index(i) + valueInterface = value.Interface() + + compareResult, isComparable := compare(prevValueInterface, valueInterface, firstValueKind) + + if !isComparable { + return Fail(t, fmt.Sprintf("Can not compare type \"%s\" and \"%s\"", reflect.TypeOf(value), reflect.TypeOf(prevValue)), msgAndArgs...) + } + + if !containsValue(allowedComparesResults, compareResult) { + return Fail(t, fmt.Sprintf(failMessage, prevValue, value), msgAndArgs...) + } + } + + return true +} + +// IsIncreasing asserts that the collection is increasing +// +// assert.IsIncreasing(t, []int{1, 2, 3}) +// assert.IsIncreasing(t, []float{1, 2}) +// assert.IsIncreasing(t, []string{"a", "b"}) +func IsIncreasing(t TestingT, object interface{}, msgAndArgs ...interface{}) bool { + return isOrdered(t, object, []CompareType{compareLess}, "\"%v\" is not less than \"%v\"", msgAndArgs) +} + +// IsNonIncreasing asserts that the collection is not increasing +// +// assert.IsNonIncreasing(t, []int{2, 1, 1}) +// assert.IsNonIncreasing(t, []float{2, 1}) +// assert.IsNonIncreasing(t, []string{"b", "a"}) +func IsNonIncreasing(t TestingT, object interface{}, msgAndArgs ...interface{}) bool { + return isOrdered(t, object, []CompareType{compareEqual, compareGreater}, "\"%v\" is not greater than or equal to \"%v\"", msgAndArgs) +} + +// IsDecreasing asserts that the collection is decreasing +// +// assert.IsDecreasing(t, []int{2, 1, 0}) +// assert.IsDecreasing(t, []float{2, 1}) +// assert.IsDecreasing(t, []string{"b", "a"}) +func IsDecreasing(t TestingT, object interface{}, msgAndArgs ...interface{}) bool { + return isOrdered(t, object, []CompareType{compareGreater}, "\"%v\" is not greater than \"%v\"", msgAndArgs) +} + +// IsNonDecreasing asserts that the collection is not decreasing +// +// assert.IsNonDecreasing(t, []int{1, 1, 2}) +// assert.IsNonDecreasing(t, []float{1, 2}) +// assert.IsNonDecreasing(t, []string{"a", "b"}) +func IsNonDecreasing(t TestingT, object interface{}, msgAndArgs ...interface{}) bool { + return isOrdered(t, object, []CompareType{compareLess, compareEqual}, "\"%v\" is not less than or equal to \"%v\"", msgAndArgs) +} diff --git a/vendor/github.com/stretchr/testify/assert/assertions.go b/vendor/github.com/stretchr/testify/assert/assertions.go index 914a10d..bcac440 100644 --- a/vendor/github.com/stretchr/testify/assert/assertions.go +++ b/vendor/github.com/stretchr/testify/assert/assertions.go @@ -172,8 +172,8 @@ func isTest(name, prefix string) bool { if len(name) == len(prefix) { // "Test" is ok return true } - rune, _ := utf8.DecodeRuneInString(name[len(prefix):]) - return !unicode.IsLower(rune) + r, _ := utf8.DecodeRuneInString(name[len(prefix):]) + return !unicode.IsLower(r) } func messageFromMsgAndArgs(msgAndArgs ...interface{}) string { @@ -1622,6 +1622,7 @@ var spewConfig = spew.ConfigState{ DisableCapacities: true, SortKeys: true, DisableMethods: true, + MaxDepth: 10, } type tHelper interface { @@ -1693,3 +1694,81 @@ func Never(t TestingT, condition func() bool, waitFor time.Duration, tick time.D } } } + +// ErrorIs asserts that at least one of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func ErrorIs(t TestingT, err, target error, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if errors.Is(err, target) { + return true + } + + var expectedText string + if target != nil { + expectedText = target.Error() + } + + chain := buildErrorChainString(err) + + return Fail(t, fmt.Sprintf("Target error should be in err chain:\n"+ + "expected: %q\n"+ + "in chain: %s", expectedText, chain, + ), msgAndArgs...) +} + +// NotErrorIs asserts that at none of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func NotErrorIs(t TestingT, err, target error, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if !errors.Is(err, target) { + return true + } + + var expectedText string + if target != nil { + expectedText = target.Error() + } + + chain := buildErrorChainString(err) + + return Fail(t, fmt.Sprintf("Target error should not be in err chain:\n"+ + "found: %q\n"+ + "in chain: %s", expectedText, chain, + ), msgAndArgs...) +} + +// ErrorAs asserts that at least one of the errors in err's chain matches target, and if so, sets target to that error value. +// This is a wrapper for errors.As. +func ErrorAs(t TestingT, err error, target interface{}, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if errors.As(err, target) { + return true + } + + chain := buildErrorChainString(err) + + return Fail(t, fmt.Sprintf("Should be in error chain:\n"+ + "expected: %q\n"+ + "in chain: %s", target, chain, + ), msgAndArgs...) +} + +func buildErrorChainString(err error) string { + if err == nil { + return "" + } + + e := errors.Unwrap(err) + chain := fmt.Sprintf("%q", err.Error()) + for e != nil { + chain += fmt.Sprintf("\n\t%q", e.Error()) + e = errors.Unwrap(e) + } + return chain +} diff --git a/vendor/github.com/stretchr/testify/require/require.go b/vendor/github.com/stretchr/testify/require/require.go index ec4624b..51820df 100644 --- a/vendor/github.com/stretchr/testify/require/require.go +++ b/vendor/github.com/stretchr/testify/require/require.go @@ -256,6 +256,54 @@ func Error(t TestingT, err error, msgAndArgs ...interface{}) { t.FailNow() } +// ErrorAs asserts that at least one of the errors in err's chain matches target, and if so, sets target to that error value. +// This is a wrapper for errors.As. +func ErrorAs(t TestingT, err error, target interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.ErrorAs(t, err, target, msgAndArgs...) { + return + } + t.FailNow() +} + +// ErrorAsf asserts that at least one of the errors in err's chain matches target, and if so, sets target to that error value. +// This is a wrapper for errors.As. +func ErrorAsf(t TestingT, err error, target interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.ErrorAsf(t, err, target, msg, args...) { + return + } + t.FailNow() +} + +// ErrorIs asserts that at least one of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func ErrorIs(t TestingT, err error, target error, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.ErrorIs(t, err, target, msgAndArgs...) { + return + } + t.FailNow() +} + +// ErrorIsf asserts that at least one of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func ErrorIsf(t TestingT, err error, target error, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.ErrorIsf(t, err, target, msg, args...) { + return + } + t.FailNow() +} + // Errorf asserts that a function returned an error (i.e. not `nil`). // // actualObj, err := SomeFunction() @@ -806,6 +854,126 @@ func InEpsilonf(t TestingT, expected interface{}, actual interface{}, epsilon fl t.FailNow() } +// IsDecreasing asserts that the collection is decreasing +// +// assert.IsDecreasing(t, []int{2, 1, 0}) +// assert.IsDecreasing(t, []float{2, 1}) +// assert.IsDecreasing(t, []string{"b", "a"}) +func IsDecreasing(t TestingT, object interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.IsDecreasing(t, object, msgAndArgs...) { + return + } + t.FailNow() +} + +// IsDecreasingf asserts that the collection is decreasing +// +// assert.IsDecreasingf(t, []int{2, 1, 0}, "error message %s", "formatted") +// assert.IsDecreasingf(t, []float{2, 1}, "error message %s", "formatted") +// assert.IsDecreasingf(t, []string{"b", "a"}, "error message %s", "formatted") +func IsDecreasingf(t TestingT, object interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.IsDecreasingf(t, object, msg, args...) { + return + } + t.FailNow() +} + +// IsIncreasing asserts that the collection is increasing +// +// assert.IsIncreasing(t, []int{1, 2, 3}) +// assert.IsIncreasing(t, []float{1, 2}) +// assert.IsIncreasing(t, []string{"a", "b"}) +func IsIncreasing(t TestingT, object interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.IsIncreasing(t, object, msgAndArgs...) { + return + } + t.FailNow() +} + +// IsIncreasingf asserts that the collection is increasing +// +// assert.IsIncreasingf(t, []int{1, 2, 3}, "error message %s", "formatted") +// assert.IsIncreasingf(t, []float{1, 2}, "error message %s", "formatted") +// assert.IsIncreasingf(t, []string{"a", "b"}, "error message %s", "formatted") +func IsIncreasingf(t TestingT, object interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.IsIncreasingf(t, object, msg, args...) { + return + } + t.FailNow() +} + +// IsNonDecreasing asserts that the collection is not decreasing +// +// assert.IsNonDecreasing(t, []int{1, 1, 2}) +// assert.IsNonDecreasing(t, []float{1, 2}) +// assert.IsNonDecreasing(t, []string{"a", "b"}) +func IsNonDecreasing(t TestingT, object interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.IsNonDecreasing(t, object, msgAndArgs...) { + return + } + t.FailNow() +} + +// IsNonDecreasingf asserts that the collection is not decreasing +// +// assert.IsNonDecreasingf(t, []int{1, 1, 2}, "error message %s", "formatted") +// assert.IsNonDecreasingf(t, []float{1, 2}, "error message %s", "formatted") +// assert.IsNonDecreasingf(t, []string{"a", "b"}, "error message %s", "formatted") +func IsNonDecreasingf(t TestingT, object interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.IsNonDecreasingf(t, object, msg, args...) { + return + } + t.FailNow() +} + +// IsNonIncreasing asserts that the collection is not increasing +// +// assert.IsNonIncreasing(t, []int{2, 1, 1}) +// assert.IsNonIncreasing(t, []float{2, 1}) +// assert.IsNonIncreasing(t, []string{"b", "a"}) +func IsNonIncreasing(t TestingT, object interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.IsNonIncreasing(t, object, msgAndArgs...) { + return + } + t.FailNow() +} + +// IsNonIncreasingf asserts that the collection is not increasing +// +// assert.IsNonIncreasingf(t, []int{2, 1, 1}, "error message %s", "formatted") +// assert.IsNonIncreasingf(t, []float{2, 1}, "error message %s", "formatted") +// assert.IsNonIncreasingf(t, []string{"b", "a"}, "error message %s", "formatted") +func IsNonIncreasingf(t TestingT, object interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.IsNonIncreasingf(t, object, msg, args...) { + return + } + t.FailNow() +} + // IsType asserts that the specified objects are of the same type. func IsType(t TestingT, expectedType interface{}, object interface{}, msgAndArgs ...interface{}) { if h, ok := t.(tHelper); ok { @@ -944,6 +1112,34 @@ func Lessf(t TestingT, e1 interface{}, e2 interface{}, msg string, args ...inter t.FailNow() } +// Negative asserts that the specified element is negative +// +// assert.Negative(t, -1) +// assert.Negative(t, -1.23) +func Negative(t TestingT, e interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Negative(t, e, msgAndArgs...) { + return + } + t.FailNow() +} + +// Negativef asserts that the specified element is negative +// +// assert.Negativef(t, -1, "error message %s", "formatted") +// assert.Negativef(t, -1.23, "error message %s", "formatted") +func Negativef(t TestingT, e interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Negativef(t, e, msg, args...) { + return + } + t.FailNow() +} + // Never asserts that the given condition doesn't satisfy in waitFor time, // periodically checking the target function each tick. // @@ -1200,6 +1396,30 @@ func NotEqualf(t TestingT, expected interface{}, actual interface{}, msg string, t.FailNow() } +// NotErrorIs asserts that at none of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func NotErrorIs(t TestingT, err error, target error, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.NotErrorIs(t, err, target, msgAndArgs...) { + return + } + t.FailNow() +} + +// NotErrorIsf asserts that at none of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func NotErrorIsf(t TestingT, err error, target error, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.NotErrorIsf(t, err, target, msg, args...) { + return + } + t.FailNow() +} + // NotNil asserts that the specified object is not nil. // // assert.NotNil(t, err) @@ -1446,6 +1666,34 @@ func Panicsf(t TestingT, f assert.PanicTestFunc, msg string, args ...interface{} t.FailNow() } +// Positive asserts that the specified element is positive +// +// assert.Positive(t, 1) +// assert.Positive(t, 1.23) +func Positive(t TestingT, e interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Positive(t, e, msgAndArgs...) { + return + } + t.FailNow() +} + +// Positivef asserts that the specified element is positive +// +// assert.Positivef(t, 1, "error message %s", "formatted") +// assert.Positivef(t, 1.23, "error message %s", "formatted") +func Positivef(t TestingT, e interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Positivef(t, e, msg, args...) { + return + } + t.FailNow() +} + // Regexp asserts that a specified regexp matches a string. // // assert.Regexp(t, regexp.MustCompile("start"), "it's starting") diff --git a/vendor/github.com/stretchr/testify/require/require_forward.go b/vendor/github.com/stretchr/testify/require/require_forward.go index 103d7dc..ed54a9d 100644 --- a/vendor/github.com/stretchr/testify/require/require_forward.go +++ b/vendor/github.com/stretchr/testify/require/require_forward.go @@ -205,6 +205,42 @@ func (a *Assertions) Error(err error, msgAndArgs ...interface{}) { Error(a.t, err, msgAndArgs...) } +// ErrorAs asserts that at least one of the errors in err's chain matches target, and if so, sets target to that error value. +// This is a wrapper for errors.As. +func (a *Assertions) ErrorAs(err error, target interface{}, msgAndArgs ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + ErrorAs(a.t, err, target, msgAndArgs...) +} + +// ErrorAsf asserts that at least one of the errors in err's chain matches target, and if so, sets target to that error value. +// This is a wrapper for errors.As. +func (a *Assertions) ErrorAsf(err error, target interface{}, msg string, args ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + ErrorAsf(a.t, err, target, msg, args...) +} + +// ErrorIs asserts that at least one of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func (a *Assertions) ErrorIs(err error, target error, msgAndArgs ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + ErrorIs(a.t, err, target, msgAndArgs...) +} + +// ErrorIsf asserts that at least one of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func (a *Assertions) ErrorIsf(err error, target error, msg string, args ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + ErrorIsf(a.t, err, target, msg, args...) +} + // Errorf asserts that a function returned an error (i.e. not `nil`). // // actualObj, err := SomeFunction() @@ -632,6 +668,102 @@ func (a *Assertions) InEpsilonf(expected interface{}, actual interface{}, epsilo InEpsilonf(a.t, expected, actual, epsilon, msg, args...) } +// IsDecreasing asserts that the collection is decreasing +// +// a.IsDecreasing([]int{2, 1, 0}) +// a.IsDecreasing([]float{2, 1}) +// a.IsDecreasing([]string{"b", "a"}) +func (a *Assertions) IsDecreasing(object interface{}, msgAndArgs ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + IsDecreasing(a.t, object, msgAndArgs...) +} + +// IsDecreasingf asserts that the collection is decreasing +// +// a.IsDecreasingf([]int{2, 1, 0}, "error message %s", "formatted") +// a.IsDecreasingf([]float{2, 1}, "error message %s", "formatted") +// a.IsDecreasingf([]string{"b", "a"}, "error message %s", "formatted") +func (a *Assertions) IsDecreasingf(object interface{}, msg string, args ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + IsDecreasingf(a.t, object, msg, args...) +} + +// IsIncreasing asserts that the collection is increasing +// +// a.IsIncreasing([]int{1, 2, 3}) +// a.IsIncreasing([]float{1, 2}) +// a.IsIncreasing([]string{"a", "b"}) +func (a *Assertions) IsIncreasing(object interface{}, msgAndArgs ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + IsIncreasing(a.t, object, msgAndArgs...) +} + +// IsIncreasingf asserts that the collection is increasing +// +// a.IsIncreasingf([]int{1, 2, 3}, "error message %s", "formatted") +// a.IsIncreasingf([]float{1, 2}, "error message %s", "formatted") +// a.IsIncreasingf([]string{"a", "b"}, "error message %s", "formatted") +func (a *Assertions) IsIncreasingf(object interface{}, msg string, args ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + IsIncreasingf(a.t, object, msg, args...) +} + +// IsNonDecreasing asserts that the collection is not decreasing +// +// a.IsNonDecreasing([]int{1, 1, 2}) +// a.IsNonDecreasing([]float{1, 2}) +// a.IsNonDecreasing([]string{"a", "b"}) +func (a *Assertions) IsNonDecreasing(object interface{}, msgAndArgs ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + IsNonDecreasing(a.t, object, msgAndArgs...) +} + +// IsNonDecreasingf asserts that the collection is not decreasing +// +// a.IsNonDecreasingf([]int{1, 1, 2}, "error message %s", "formatted") +// a.IsNonDecreasingf([]float{1, 2}, "error message %s", "formatted") +// a.IsNonDecreasingf([]string{"a", "b"}, "error message %s", "formatted") +func (a *Assertions) IsNonDecreasingf(object interface{}, msg string, args ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + IsNonDecreasingf(a.t, object, msg, args...) +} + +// IsNonIncreasing asserts that the collection is not increasing +// +// a.IsNonIncreasing([]int{2, 1, 1}) +// a.IsNonIncreasing([]float{2, 1}) +// a.IsNonIncreasing([]string{"b", "a"}) +func (a *Assertions) IsNonIncreasing(object interface{}, msgAndArgs ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + IsNonIncreasing(a.t, object, msgAndArgs...) +} + +// IsNonIncreasingf asserts that the collection is not increasing +// +// a.IsNonIncreasingf([]int{2, 1, 1}, "error message %s", "formatted") +// a.IsNonIncreasingf([]float{2, 1}, "error message %s", "formatted") +// a.IsNonIncreasingf([]string{"b", "a"}, "error message %s", "formatted") +func (a *Assertions) IsNonIncreasingf(object interface{}, msg string, args ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + IsNonIncreasingf(a.t, object, msg, args...) +} + // IsType asserts that the specified objects are of the same type. func (a *Assertions) IsType(expectedType interface{}, object interface{}, msgAndArgs ...interface{}) { if h, ok := a.t.(tHelper); ok { @@ -740,6 +872,28 @@ func (a *Assertions) Lessf(e1 interface{}, e2 interface{}, msg string, args ...i Lessf(a.t, e1, e2, msg, args...) } +// Negative asserts that the specified element is negative +// +// a.Negative(-1) +// a.Negative(-1.23) +func (a *Assertions) Negative(e interface{}, msgAndArgs ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + Negative(a.t, e, msgAndArgs...) +} + +// Negativef asserts that the specified element is negative +// +// a.Negativef(-1, "error message %s", "formatted") +// a.Negativef(-1.23, "error message %s", "formatted") +func (a *Assertions) Negativef(e interface{}, msg string, args ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + Negativef(a.t, e, msg, args...) +} + // Never asserts that the given condition doesn't satisfy in waitFor time, // periodically checking the target function each tick. // @@ -942,6 +1096,24 @@ func (a *Assertions) NotEqualf(expected interface{}, actual interface{}, msg str NotEqualf(a.t, expected, actual, msg, args...) } +// NotErrorIs asserts that at none of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func (a *Assertions) NotErrorIs(err error, target error, msgAndArgs ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + NotErrorIs(a.t, err, target, msgAndArgs...) +} + +// NotErrorIsf asserts that at none of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func (a *Assertions) NotErrorIsf(err error, target error, msg string, args ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + NotErrorIsf(a.t, err, target, msg, args...) +} + // NotNil asserts that the specified object is not nil. // // a.NotNil(err) @@ -1134,6 +1306,28 @@ func (a *Assertions) Panicsf(f assert.PanicTestFunc, msg string, args ...interfa Panicsf(a.t, f, msg, args...) } +// Positive asserts that the specified element is positive +// +// a.Positive(1) +// a.Positive(1.23) +func (a *Assertions) Positive(e interface{}, msgAndArgs ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + Positive(a.t, e, msgAndArgs...) +} + +// Positivef asserts that the specified element is positive +// +// a.Positivef(1, "error message %s", "formatted") +// a.Positivef(1.23, "error message %s", "formatted") +func (a *Assertions) Positivef(e interface{}, msg string, args ...interface{}) { + if h, ok := a.t.(tHelper); ok { + h.Helper() + } + Positivef(a.t, e, msg, args...) +} + // Regexp asserts that a specified regexp matches a string. // // a.Regexp(regexp.MustCompile("start"), "it's starting") diff --git a/vendor/github.com/ulikunitz/xz/LICENSE b/vendor/github.com/ulikunitz/xz/LICENSE index d321499..009b848 100644 --- a/vendor/github.com/ulikunitz/xz/LICENSE +++ b/vendor/github.com/ulikunitz/xz/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2014-2020 Ulrich Kunitz +Copyright (c) 2014-2021 Ulrich Kunitz All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/vendor/github.com/ulikunitz/xz/SECURITY.md b/vendor/github.com/ulikunitz/xz/SECURITY.md new file mode 100644 index 0000000..5f7ec01 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/SECURITY.md @@ -0,0 +1,10 @@ +# Security Policy + +## Supported Versions + +Currently the last minor version v0.5.x is supported. + +## Reporting a Vulnerability + +Report a vulnerability by creating a Github issue at +. Expect a response in a week. diff --git a/vendor/github.com/ulikunitz/xz/TODO.md b/vendor/github.com/ulikunitz/xz/TODO.md index a4224ce..594e0c7 100644 --- a/vendor/github.com/ulikunitz/xz/TODO.md +++ b/vendor/github.com/ulikunitz/xz/TODO.md @@ -8,19 +8,17 @@ 1. Review encoder and check for lzma improvements under xz. 2. Fix binary tree matcher. -3. Compare compression ratio with xz tool using comparable parameters - and optimize parameters -4. Do some optimizations - - rename operation action and make it a simple type of size 8 - - make maxMatches, wordSize parameters - - stop searching after a certain length is found (parameter sweetLen) +3. Compare compression ratio with xz tool using comparable parameters and optimize parameters +4. rename operation action and make it a simple type of size 8 +5. make maxMatches, wordSize parameters +6. stop searching after a certain length is found (parameter sweetLen) ## Release v0.7 1. Optimize code 2. Do statistical analysis to get linear presets. 3. Test sync.Pool compatability for xz and lzma Writer and Reader -3. Fuzz optimized code. +4. Fuzz optimized code. ## Release v0.8 @@ -44,52 +42,85 @@ ## Package lzma -### Release v0.6 - -- Rewrite Encoder into a simple greedy one-op-at-a-time encoder - including - + simple scan at the dictionary head for the same byte - + use the killer byte (requiring matches to get longer, the first - test should be the byte that would make the match longer) +### v0.6 +* Rewrite Encoder into a simple greedy one-op-at-a-time encoder including + * simple scan at the dictionary head for the same byte + * use the killer byte (requiring matches to get longer, the first test should be the byte that would make the match longer) ## Optimizations -- There may be a lot of false sharing in lzma.State; check whether this - can be improved by reorganizing the internal structure of it. -- Check whether batching encoding and decoding improves speed. +* There may be a lot of false sharing in lzma. State; check whether this can be improved by reorganizing the internal structure of it. + +* Check whether batching encoding and decoding improves speed. ### DAG optimizations -- Use full buffer to create minimal bit-length above range encoder. -- Might be too slow (see v0.4) +* Use full buffer to create minimal bit-length above range encoder. +* Might be too slow (see v0.4) ### Different match finders -- hashes with 2, 3 characters additional to 4 characters -- binary trees with 2-7 characters (uint64 as key, use uint32 as +* hashes with 2, 3 characters additional to 4 characters +* binary trees with 2-7 characters (uint64 as key, use uint32 as + pointers into a an array) -- rb-trees with 2-7 characters (uint64 as key, use uint32 as pointers + +* rb-trees with 2-7 characters (uint64 as key, use uint32 as pointers + into an array with bit-steeling for the colors) ## Release Procedure -- execute goch -l for all packages; probably with lower param like 0.5. -- check orthography with gospell -- Write release notes in doc/relnotes. -- Update README.md -- xb copyright . in xz directory to ensure all new files have Copyright - header -- VERSION= go generate github.com/ulikunitz/xz/... to update - version files -- Execute test for Linux/amd64, Linux/x86 and Windows/amd64. -- Update TODO.md - write short log entry -- git checkout master && git merge dev -- git tag -a -- git push +* execute goch -l for all packages; probably with lower param like 0.5. +* check orthography with gospell +* Write release notes in doc/relnotes. +* Update README.md +* xb copyright . in xz directory to ensure all new files have Copyright header +* `VERSION= go generate github.com/ulikunitz/xz/...` to update version files +* Execute test for Linux/amd64, Linux/x86 and Windows/amd64. +* Update TODO.md - write short log entry +* `git checkout master && git merge dev` +* `git tag -a ` +* `git push` ## Log +### 2021-02-02 + +Mituo Heijo has fuzzed xz and found a bug in the function readIndexBody. The +function allocated a slice of records immediately after reading the value +without further checks. Since the number has been too large the make function +did panic. The fix is to check the number against the expected number of records +before allocating the records. + +### 2020-12-17 + +Release v0.5.9 fixes warnings, a typo and adds SECURITY.md. + +One fix is interesting. + +```go +const ( + a byte = 0x1 + b = 0x2 +) +``` + +The constants a and b don't have the same type. Correct is + +```go +const ( + a byte = 0x1 + b byte = 0x2 +) +``` + +### 2020-08-19 + +Release v0.5.8 fixes issue +[issue #35](https://github.com/ulikunitz/xz/issues/35). + ### 2020-02-24 Release v0.5.7 supports the check-ID None and fixes @@ -203,8 +234,8 @@ MININT. ### 2015-06-04 -It has been a productive day. I improved the interface of lzma.Reader -and lzma.Writer and fixed the error handling. +It has been a productive day. I improved the interface of lzma. Reader +and lzma. Writer and fixed the error handling. ### 2015-06-01 @@ -255,7 +286,7 @@ needed anymore. However I will implement a ReaderState and WriterState type to use static typing to ensure the right State object is combined with the -right lzbase.Reader and lzbase.Writer. +right lzbase. Reader and lzbase. Writer. As a start I have implemented ReaderState and WriterState to ensure that the state for reading is only used by readers and WriterState only @@ -277,11 +308,11 @@ old lzma package has been completely removed. ### 2015-04-05 -Implemented lzma.Reader and tested it. +Implemented lzma. Reader and tested it. ### 2015-04-04 -Implemented baseReader by adapting code form lzma.Reader. +Implemented baseReader by adapting code form lzma. Reader. ### 2015-04-03 @@ -297,7 +328,7 @@ However in Francesco Campoy's presentation "Go for Javaneros (Javaïstes?)" is the the idea that using an embedded field E, all the methods of E will be defined on T. If E is an interface T satisfies E. -https://talks.golang.org/2014/go4java.slide#51 + I have never used this, but it seems to be a cool idea. @@ -322,11 +353,11 @@ and the opCodec. 1. Implemented simple lzmago tool 2. Tested tool against large 4.4G file - - compression worked correctly; tested decompression with lzma - - decompression hits a full buffer condition + * compression worked correctly; tested decompression with lzma + * decompression hits a full buffer condition 3. Fixed a bug in the compressor and wrote a test for it 4. Executed full cycle for 4.4 GB file; performance can be improved ;-) ### 2015-01-11 -- Release v0.2 because of the working LZMA encoder and decoder +* Release v0.2 because of the working LZMA encoder and decoder diff --git a/vendor/github.com/ulikunitz/xz/bits.go b/vendor/github.com/ulikunitz/xz/bits.go index 364213d..e48450c 100644 --- a/vendor/github.com/ulikunitz/xz/bits.go +++ b/vendor/github.com/ulikunitz/xz/bits.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -54,6 +54,8 @@ var errOverflowU64 = errors.New("xz: uvarint overflows 64-bit unsigned integer") // readUvarint reads a uvarint from the given byte reader. func readUvarint(r io.ByteReader) (x uint64, n int, err error) { + const maxUvarintLen = 10 + var s uint i := 0 for { @@ -62,8 +64,11 @@ func readUvarint(r io.ByteReader) (x uint64, n int, err error) { return x, i, err } i++ + if i > maxUvarintLen { + return x, i, errOverflowU64 + } if b < 0x80 { - if i > 10 || i == 10 && b > 1 { + if i == maxUvarintLen && b > 1 { return x, i, errOverflowU64 } return x | uint64(b)<>27]) } +*/ // nlz32 computes the number of leading zeros for an unsigned 32-bit integer. func nlz32(x uint32) int { diff --git a/vendor/github.com/ulikunitz/xz/lzma/breader.go b/vendor/github.com/ulikunitz/xz/lzma/breader.go index 4ad09a1..939be88 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/breader.go +++ b/vendor/github.com/ulikunitz/xz/lzma/breader.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/vendor/github.com/ulikunitz/xz/lzma/buffer.go b/vendor/github.com/ulikunitz/xz/lzma/buffer.go index 9cb7838..2761de5 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/buffer.go +++ b/vendor/github.com/ulikunitz/xz/lzma/buffer.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/vendor/github.com/ulikunitz/xz/lzma/bytewriter.go b/vendor/github.com/ulikunitz/xz/lzma/bytewriter.go index 290606d..040874c 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/bytewriter.go +++ b/vendor/github.com/ulikunitz/xz/lzma/bytewriter.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/vendor/github.com/ulikunitz/xz/lzma/decoder.go b/vendor/github.com/ulikunitz/xz/lzma/decoder.go index e5a760a..cbb943a 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/decoder.go +++ b/vendor/github.com/ulikunitz/xz/lzma/decoder.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -200,7 +200,7 @@ func (d *decoder) decompress() error { op, err := d.readOp() switch err { case nil: - break + // break case errEOS: d.eos = true if !d.rd.possiblyAtEnd() { diff --git a/vendor/github.com/ulikunitz/xz/lzma/decoderdict.go b/vendor/github.com/ulikunitz/xz/lzma/decoderdict.go index ba06712..8cd616e 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/decoderdict.go +++ b/vendor/github.com/ulikunitz/xz/lzma/decoderdict.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -126,10 +126,3 @@ func (d *decoderDict) Available() int { return d.buf.Available() } // Read reads data from the buffer contained in the decoder dictionary. func (d *decoderDict) Read(p []byte) (n int, err error) { return d.buf.Read(p) } - -// Buffered returns the number of bytes currently buffered in the -// decoder dictionary. -func (d *decoderDict) buffered() int { return d.buf.Buffered() } - -// Peek gets data from the buffer without advancing the rear index. -func (d *decoderDict) peek(p []byte) (n int, err error) { return d.buf.Peek(p) } diff --git a/vendor/github.com/ulikunitz/xz/lzma/directcodec.go b/vendor/github.com/ulikunitz/xz/lzma/directcodec.go index e6e0c6d..20b256a 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/directcodec.go +++ b/vendor/github.com/ulikunitz/xz/lzma/directcodec.go @@ -1,24 +1,13 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package lzma -import "fmt" - // directCodec allows the encoding and decoding of values with a fixed number // of bits. The number of bits must be in the range [1,32]. type directCodec byte -// makeDirectCodec creates a directCodec. The function panics if the number of -// bits is not in the range [1,32]. -func makeDirectCodec(bits int) directCodec { - if !(1 <= bits && bits <= 32) { - panic(fmt.Errorf("bits=%d out of range", bits)) - } - return directCodec(bits) -} - // Bits returns the number of bits supported by this codec. func (dc directCodec) Bits() int { return int(dc) diff --git a/vendor/github.com/ulikunitz/xz/lzma/distcodec.go b/vendor/github.com/ulikunitz/xz/lzma/distcodec.go index 69871c0..60ed9ae 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/distcodec.go +++ b/vendor/github.com/ulikunitz/xz/lzma/distcodec.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -20,8 +20,6 @@ const ( posSlotBits = 6 // number of align bits alignBits = 4 - // maximum position slot - maxPosSlot = 63 ) // distCodec provides encoding and decoding of distance values. @@ -45,20 +43,6 @@ func (dc *distCodec) deepcopy(src *distCodec) { dc.alignCodec.deepcopy(&src.alignCodec) } -// distBits returns the number of bits required to encode dist. -func distBits(dist uint32) int { - if dist < startPosModel { - return 6 - } - // slot s > 3, dist d - // s = 2(bits(d)-1) + bit(d, bits(d)-2) - // s>>1 = bits(d)-1 - // bits(d) = 32-nlz32(d) - // s>>1=31-nlz32(d) - // n = 5 + (s>>1) = 36 - nlz32(d) - return 36 - nlz32(dist) -} - // newDistCodec creates a new distance codec. func (dc *distCodec) init() { for i := range dc.posSlotCodecs { diff --git a/vendor/github.com/ulikunitz/xz/lzma/encoder.go b/vendor/github.com/ulikunitz/xz/lzma/encoder.go index 59055eb..5ed057a 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/encoder.go +++ b/vendor/github.com/ulikunitz/xz/lzma/encoder.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/vendor/github.com/ulikunitz/xz/lzma/encoderdict.go b/vendor/github.com/ulikunitz/xz/lzma/encoderdict.go index 40f3d3f..056f897 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/encoderdict.go +++ b/vendor/github.com/ulikunitz/xz/lzma/encoderdict.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -19,7 +19,7 @@ type matcher interface { } // encoderDict provides the dictionary of the encoder. It includes an -// addtional buffer atop of the actual dictionary. +// additional buffer atop of the actual dictionary. type encoderDict struct { buf buffer m matcher diff --git a/vendor/github.com/ulikunitz/xz/lzma/hashtable.go b/vendor/github.com/ulikunitz/xz/lzma/hashtable.go index e82970e..0fb7910 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/hashtable.go +++ b/vendor/github.com/ulikunitz/xz/lzma/hashtable.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/vendor/github.com/ulikunitz/xz/lzma/header.go b/vendor/github.com/ulikunitz/xz/lzma/header.go index cda3946..04276c8 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/header.go +++ b/vendor/github.com/ulikunitz/xz/lzma/header.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/vendor/github.com/ulikunitz/xz/lzma/header2.go b/vendor/github.com/ulikunitz/xz/lzma/header2.go index cd14881..be54dd8 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/header2.go +++ b/vendor/github.com/ulikunitz/xz/lzma/header2.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -264,7 +264,7 @@ type chunkState byte // state const ( start chunkState = 'S' - stop = 'T' + stop chunkState = 'T' ) // errors for the chunk state handling diff --git a/vendor/github.com/ulikunitz/xz/lzma/lengthcodec.go b/vendor/github.com/ulikunitz/xz/lzma/lengthcodec.go index 927395b..6e0edfc 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/lengthcodec.go +++ b/vendor/github.com/ulikunitz/xz/lzma/lengthcodec.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -56,19 +56,6 @@ func (lc *lengthCodec) init() { lc.high = makeTreeCodec(8) } -// lBits gives the number of bits used for the encoding of the l value -// provided to the range encoder. -func lBits(l uint32) int { - switch { - case l < 8: - return 4 - case l < 16: - return 5 - default: - return 10 - } -} - // Encode encodes the length offset. The length offset l can be compute by // subtracting minMatchLen (2) from the actual length. // diff --git a/vendor/github.com/ulikunitz/xz/lzma/literalcodec.go b/vendor/github.com/ulikunitz/xz/lzma/literalcodec.go index ca31530..0bfc763 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/literalcodec.go +++ b/vendor/github.com/ulikunitz/xz/lzma/literalcodec.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -123,10 +123,3 @@ const ( minLP = 0 maxLP = 4 ) - -// minState and maxState define a range for the state values stored in -// the State values. -const ( - minState = 0 - maxState = 11 -) diff --git a/vendor/github.com/ulikunitz/xz/lzma/matchalgorithm.go b/vendor/github.com/ulikunitz/xz/lzma/matchalgorithm.go index 7d03ec0..96ebda0 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/matchalgorithm.go +++ b/vendor/github.com/ulikunitz/xz/lzma/matchalgorithm.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/vendor/github.com/ulikunitz/xz/lzma/operation.go b/vendor/github.com/ulikunitz/xz/lzma/operation.go index a75c9b4..026ce48 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/operation.go +++ b/vendor/github.com/ulikunitz/xz/lzma/operation.go @@ -1,11 +1,10 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package lzma import ( - "errors" "fmt" "unicode" ) @@ -24,30 +23,6 @@ type match struct { n int } -// verify checks whether the match is valid. If that is not the case an -// error is returned. -func (m match) verify() error { - if !(minDistance <= m.distance && m.distance <= maxDistance) { - return errors.New("distance out of range") - } - if !(1 <= m.n && m.n <= maxMatchLen) { - return errors.New("length out of range") - } - return nil -} - -// l return the l-value for the match, which is the difference of length -// n and 2. -func (m match) l() uint32 { - return uint32(m.n - minMatchLen) -} - -// dist returns the dist value for the match, which is one less of the -// distance stored in the match. -func (m match) dist() uint32 { - return uint32(m.distance - minDistance) -} - // Len returns the number of bytes matched. func (m match) Len() int { return m.n diff --git a/vendor/github.com/ulikunitz/xz/lzma/prob.go b/vendor/github.com/ulikunitz/xz/lzma/prob.go index 6987a16..9a2648e 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/prob.go +++ b/vendor/github.com/ulikunitz/xz/lzma/prob.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/vendor/github.com/ulikunitz/xz/lzma/properties.go b/vendor/github.com/ulikunitz/xz/lzma/properties.go index 662feba..f229fc9 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/properties.go +++ b/vendor/github.com/ulikunitz/xz/lzma/properties.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/vendor/github.com/ulikunitz/xz/lzma/rangecodec.go b/vendor/github.com/ulikunitz/xz/lzma/rangecodec.go index 7189a03..57f1ab9 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/rangecodec.go +++ b/vendor/github.com/ulikunitz/xz/lzma/rangecodec.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -131,32 +131,6 @@ type rangeDecoder struct { code uint32 } -// init initializes the range decoder, by reading from the byte reader. -func (d *rangeDecoder) init() error { - d.nrange = 0xffffffff - d.code = 0 - - b, err := d.br.ReadByte() - if err != nil { - return err - } - if b != 0 { - return errors.New("newRangeDecoder: first byte not zero") - } - - for i := 0; i < 4; i++ { - if err = d.updateCode(); err != nil { - return err - } - } - - if d.code >= d.nrange { - return errors.New("newRangeDecoder: d.code >= d.nrange") - } - - return nil -} - // newRangeDecoder initializes a range decoder. It reads five bytes from the // reader and therefore may return an error. func newRangeDecoder(br io.ByteReader) (d *rangeDecoder, err error) { diff --git a/vendor/github.com/ulikunitz/xz/lzma/reader.go b/vendor/github.com/ulikunitz/xz/lzma/reader.go index 7b7eef3..2ed13c8 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/reader.go +++ b/vendor/github.com/ulikunitz/xz/lzma/reader.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/vendor/github.com/ulikunitz/xz/lzma/reader2.go b/vendor/github.com/ulikunitz/xz/lzma/reader2.go index 33074e6..de3da37 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/reader2.go +++ b/vendor/github.com/ulikunitz/xz/lzma/reader2.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -48,7 +48,6 @@ type Reader2 struct { chunkReader io.Reader cstate chunkState - ctype chunkType } // NewReader2 creates a reader for an LZMA2 chunk sequence. diff --git a/vendor/github.com/ulikunitz/xz/lzma/state.go b/vendor/github.com/ulikunitz/xz/lzma/state.go index 03f061c..09d62f7 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/state.go +++ b/vendor/github.com/ulikunitz/xz/lzma/state.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -53,12 +53,6 @@ func (s *state) Reset() { s.distCodec.init() } -// initState initializes the state. -func initState(s *state, p Properties) { - *s = state{Properties: p} - s.Reset() -} - // newState creates a new state from the give Properties. func newState(p Properties) *state { s := &state{Properties: p} diff --git a/vendor/github.com/ulikunitz/xz/lzma/treecodecs.go b/vendor/github.com/ulikunitz/xz/lzma/treecodecs.go index 1cb3596..6e927e9 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/treecodecs.go +++ b/vendor/github.com/ulikunitz/xz/lzma/treecodecs.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/vendor/github.com/ulikunitz/xz/lzma/writer.go b/vendor/github.com/ulikunitz/xz/lzma/writer.go index 5803ecc..d0d220f 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/writer.go +++ b/vendor/github.com/ulikunitz/xz/lzma/writer.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/vendor/github.com/ulikunitz/xz/lzma/writer2.go b/vendor/github.com/ulikunitz/xz/lzma/writer2.go index c263b06..dfaaec9 100644 --- a/vendor/github.com/ulikunitz/xz/lzma/writer2.go +++ b/vendor/github.com/ulikunitz/xz/lzma/writer2.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/vendor/github.com/ulikunitz/xz/lzmafilter.go b/vendor/github.com/ulikunitz/xz/lzmafilter.go index 6f4aa2c..4f1bb33 100644 --- a/vendor/github.com/ulikunitz/xz/lzmafilter.go +++ b/vendor/github.com/ulikunitz/xz/lzmafilter.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/vendor/github.com/ulikunitz/xz/none-check.go b/vendor/github.com/ulikunitz/xz/none-check.go index e12d8e4..9524013 100644 --- a/vendor/github.com/ulikunitz/xz/none-check.go +++ b/vendor/github.com/ulikunitz/xz/none-check.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/vendor/github.com/ulikunitz/xz/reader.go b/vendor/github.com/ulikunitz/xz/reader.go index 22cd6d5..7f974ff 100644 --- a/vendor/github.com/ulikunitz/xz/reader.go +++ b/vendor/github.com/ulikunitz/xz/reader.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -26,13 +26,6 @@ type ReaderConfig struct { SingleStream bool } -// fill replaces all zero values with their default values. -func (c *ReaderConfig) fill() { - if c.DictCap == 0 { - c.DictCap = 8 * 1024 * 1024 - } -} - // Verify checks the reader parameters for Validity. Zero values will be // replaced by default values. func (c *ReaderConfig) Verify() error { @@ -165,22 +158,16 @@ func (c ReaderConfig) newStreamReader(xz io.Reader) (r *streamReader, err error) return r, nil } -// errIndex indicates an error with the xz file index. -var errIndex = errors.New("xz: error in xz file index") - // readTail reads the index body and the xz footer. func (r *streamReader) readTail() error { - index, n, err := readIndexBody(r.xz) + index, n, err := readIndexBody(r.xz, len(r.index)) if err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF } return err } - if len(index) != len(r.index) { - return fmt.Errorf("xz: index length is %d; want %d", - len(index), len(r.index)) - } + for i, rec := range r.index { if rec != index[i] { return fmt.Errorf("xz: record %d is %v; want %v", @@ -265,7 +252,6 @@ type blockReader struct { n int64 hash hash.Hash r io.Reader - err error } // newBlockReader creates a new block reader. @@ -315,10 +301,6 @@ func (br *blockReader) record() record { return record{br.unpaddedSize(), br.uncompressedSize()} } -// errBlockSize indicates that the size of the block in the block header -// is wrong. -var errBlockSize = errors.New("xz: wrong uncompressed size for block") - // Read reads data from the block. func (br *blockReader) Read(p []byte) (n int, err error) { n, err = br.r.Read(p) diff --git a/vendor/github.com/ulikunitz/xz/writer.go b/vendor/github.com/ulikunitz/xz/writer.go index aec10df..6b3a666 100644 --- a/vendor/github.com/ulikunitz/xz/writer.go +++ b/vendor/github.com/ulikunitz/xz/writer.go @@ -1,4 +1,4 @@ -// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -6,6 +6,7 @@ package xz import ( "errors" + "fmt" "hash" "io" @@ -190,6 +191,9 @@ func (c WriterConfig) NewWriter(xz io.Writer) (w *Writer, err error) { return nil, err } data, err := w.h.MarshalBinary() + if err != nil { + return nil, fmt.Errorf("w.h.MarshalBinary(): error %w", err) + } if _, err = xz.Write(data); err != nil { return nil, err } diff --git a/vendor/gopkg.in/yaml.v3/.travis.yml b/vendor/gopkg.in/yaml.v3/.travis.yml deleted file mode 100644 index a130fe8..0000000 --- a/vendor/gopkg.in/yaml.v3/.travis.yml +++ /dev/null @@ -1,17 +0,0 @@ -language: go - -go: - - "1.4.x" - - "1.5.x" - - "1.6.x" - - "1.7.x" - - "1.8.x" - - "1.9.x" - - "1.10.x" - - "1.11.x" - - "1.12.x" - - "1.13.x" - - "1.14.x" - - "tip" - -go_import_path: gopkg.in/yaml.v3 diff --git a/vendor/gopkg.in/yaml.v3/decode.go b/vendor/gopkg.in/yaml.v3/decode.go index 21c0dac..df36e3a 100644 --- a/vendor/gopkg.in/yaml.v3/decode.go +++ b/vendor/gopkg.in/yaml.v3/decode.go @@ -399,7 +399,7 @@ func (d *decoder) callObsoleteUnmarshaler(n *Node, u obsoleteUnmarshaler) (good // // If n holds a null value, prepare returns before doing anything. func (d *decoder) prepare(n *Node, out reflect.Value) (newout reflect.Value, unmarshaled, good bool) { - if n.ShortTag() == nullTag || n.Kind == 0 && n.IsZero() { + if n.ShortTag() == nullTag { return out, false, false } again := true @@ -808,8 +808,10 @@ func (d *decoder) mapping(n *Node, out reflect.Value) (good bool) { } } + mapIsNew := false if out.IsNil() { out.Set(reflect.MakeMap(outt)) + mapIsNew = true } for i := 0; i < l; i += 2 { if isMerge(n.Content[i]) { @@ -826,7 +828,7 @@ func (d *decoder) mapping(n *Node, out reflect.Value) (good bool) { failf("invalid map key: %#v", k.Interface()) } e := reflect.New(et).Elem() - if d.unmarshal(n.Content[i+1], e) { + if d.unmarshal(n.Content[i+1], e) || n.Content[i+1].ShortTag() == nullTag && (mapIsNew || !out.MapIndex(k).IsValid()) { out.SetMapIndex(k, e) } } diff --git a/vendor/gopkg.in/yaml.v3/emitterc.go b/vendor/gopkg.in/yaml.v3/emitterc.go index c29217e..0f47c9c 100644 --- a/vendor/gopkg.in/yaml.v3/emitterc.go +++ b/vendor/gopkg.in/yaml.v3/emitterc.go @@ -814,26 +814,24 @@ func yaml_emitter_emit_block_mapping_value(emitter *yaml_emitter_t, event *yaml_ } } if len(emitter.key_line_comment) > 0 { - // [Go] A line comment was previously provided for the key. Handle it before - // the value so the inline comments are placed correctly. - if yaml_emitter_silent_nil_event(emitter, event) && len(emitter.line_comment) == 0 { - // Nothing other than the line comment will be written on the line. - emitter.line_comment = emitter.key_line_comment - emitter.key_line_comment = nil - } else { - // An actual value is coming, so emit the comment line. + // [Go] Line comments are generally associated with the value, but when there's + // no value on the same line as a mapping key they end up attached to the + // key itself. + if event.typ == yaml_SCALAR_EVENT { + if len(emitter.line_comment) == 0 { + // A scalar is coming and it has no line comments by itself yet, + // so just let it handle the line comment as usual. If it has a + // line comment, we can't have both so the one from the key is lost. + emitter.line_comment = emitter.key_line_comment + emitter.key_line_comment = nil + } + } else if event.sequence_style() != yaml_FLOW_SEQUENCE_STYLE && (event.typ == yaml_MAPPING_START_EVENT || event.typ == yaml_SEQUENCE_START_EVENT) { + // An indented block follows, so write the comment right now. emitter.line_comment, emitter.key_line_comment = emitter.key_line_comment, emitter.line_comment if !yaml_emitter_process_line_comment(emitter) { return false } emitter.line_comment, emitter.key_line_comment = emitter.key_line_comment, emitter.line_comment - // Indent in unless it's a block that will reindent anyway. - if event.sequence_style() == yaml_FLOW_SEQUENCE_STYLE || (event.typ != yaml_MAPPING_START_EVENT && event.typ != yaml_SEQUENCE_START_EVENT) { - emitter.indent = emitter.best_indent*((emitter.indent+emitter.best_indent)/emitter.best_indent) - if !yaml_emitter_write_indent(emitter) { - return false - } - } } } emitter.states = append(emitter.states, yaml_EMIT_BLOCK_MAPPING_KEY_STATE) @@ -1896,7 +1894,7 @@ func yaml_emitter_write_literal_scalar(emitter *yaml_emitter_t, value []byte) bo if !yaml_emitter_write_block_scalar_hints(emitter, value) { return false } - if !put_break(emitter) { + if !yaml_emitter_process_line_comment(emitter) { return false } //emitter.indention = true @@ -1933,10 +1931,10 @@ func yaml_emitter_write_folded_scalar(emitter *yaml_emitter_t, value []byte) boo if !yaml_emitter_write_block_scalar_hints(emitter, value) { return false } - - if !put_break(emitter) { + if !yaml_emitter_process_line_comment(emitter) { return false } + //emitter.indention = true emitter.whitespace = true diff --git a/vendor/gopkg.in/yaml.v3/encode.go b/vendor/gopkg.in/yaml.v3/encode.go index 45e8d1e..de9e72a 100644 --- a/vendor/gopkg.in/yaml.v3/encode.go +++ b/vendor/gopkg.in/yaml.v3/encode.go @@ -120,6 +120,11 @@ func (e *encoder) marshal(tag string, in reflect.Value) { e.nodev(in) return case Node: + if !in.CanAddr() { + var n = reflect.New(in.Type()).Elem() + n.Set(in) + in = n + } e.nodev(in.Addr()) return case time.Time: diff --git a/vendor/gopkg.in/yaml.v3/scannerc.go b/vendor/gopkg.in/yaml.v3/scannerc.go index d9a539c..ca00701 100644 --- a/vendor/gopkg.in/yaml.v3/scannerc.go +++ b/vendor/gopkg.in/yaml.v3/scannerc.go @@ -2260,10 +2260,9 @@ func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, l } } if parser.buffer[parser.buffer_pos] == '#' { - // TODO Test this and then re-enable it. - //if !yaml_parser_scan_line_comment(parser, start_mark) { - // return false - //} + if !yaml_parser_scan_line_comment(parser, start_mark) { + return false + } for !is_breakz(parser.buffer, parser.buffer_pos) { skip(parser) if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { @@ -2892,6 +2891,10 @@ func yaml_parser_scan_comments(parser *yaml_parser_t, scan_mark yaml_mark_t) boo var token_mark = token.start_mark var start_mark yaml_mark_t + var next_indent = parser.indent + if next_indent < 0 { + next_indent = 0 + } var recent_empty = false var first_empty = parser.newlines <= 1 @@ -2923,15 +2926,18 @@ func yaml_parser_scan_comments(parser *yaml_parser_t, scan_mark yaml_mark_t) boo continue } c := parser.buffer[parser.buffer_pos+peek] - if is_breakz(parser.buffer, parser.buffer_pos+peek) || parser.flow_level > 0 && (c == ']' || c == '}') { + var close_flow = parser.flow_level > 0 && (c == ']' || c == '}') + if close_flow || is_breakz(parser.buffer, parser.buffer_pos+peek) { // Got line break or terminator. - if !recent_empty { - if first_empty && (start_mark.line == foot_line || start_mark.column-1 < parser.indent) { + if close_flow || !recent_empty { + if close_flow || first_empty && (start_mark.line == foot_line && token.typ != yaml_VALUE_TOKEN || start_mark.column-1 < next_indent) { // This is the first empty line and there were no empty lines before, // so this initial part of the comment is a foot of the prior token // instead of being a head for the following one. Split it up. + // Alternatively, this might also be the last comment inside a flow + // scope, so it must be a footer. if len(text) > 0 { - if start_mark.column-1 < parser.indent { + if start_mark.column-1 < next_indent { // If dedented it's unrelated to the prior token. token_mark = start_mark } @@ -2962,7 +2968,7 @@ func yaml_parser_scan_comments(parser *yaml_parser_t, scan_mark yaml_mark_t) boo continue } - if len(text) > 0 && column < parser.indent+1 && column != start_mark.column { + if len(text) > 0 && (close_flow || column-1 < next_indent && column != start_mark.column) { // The comment at the different indentation is a foot of the // preceding data rather than a head of the upcoming one. parser.comments = append(parser.comments, yaml_comment_t{ @@ -3013,6 +3019,10 @@ func yaml_parser_scan_comments(parser *yaml_parser_t, scan_mark yaml_mark_t) boo peek = 0 column = 0 line = parser.mark.line + next_indent = parser.indent + if next_indent < 0 { + next_indent = 0 + } } if len(text) > 0 { diff --git a/vendor/gopkg.in/yaml.v3/yaml.go b/vendor/gopkg.in/yaml.v3/yaml.go index 56e8a84..8cec6da 100644 --- a/vendor/gopkg.in/yaml.v3/yaml.go +++ b/vendor/gopkg.in/yaml.v3/yaml.go @@ -449,6 +449,11 @@ func (n *Node) ShortTag() string { case ScalarNode: tag, _ := resolve("", n.Value) return tag + case 0: + // Special case to make the zero value convenient. + if n.IsZero() { + return nullTag + } } return "" } diff --git a/vendor/modules.txt b/vendor/modules.txt index f3dcd53..a0c4611 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,12 +1,11 @@ -# github.com/andybalholm/brotli v1.0.0 +# github.com/andybalholm/brotli v1.0.4 ## explicit; go 1.12 github.com/andybalholm/brotli -# github.com/bitrise-io/go-utils v0.0.0-20200224122728-e212188d99b4 +# github.com/bitrise-io/go-utils v1.0.2 ## explicit; go 1.13 github.com/bitrise-io/go-utils/colorstring github.com/bitrise-io/go-utils/command github.com/bitrise-io/go-utils/command/git -github.com/bitrise-io/go-utils/errorutil github.com/bitrise-io/go-utils/fileutil github.com/bitrise-io/go-utils/log github.com/bitrise-io/go-utils/parseutil @@ -30,41 +29,43 @@ github.com/dsnet/compress/bzip2/internal/sais github.com/dsnet/compress/internal github.com/dsnet/compress/internal/errors github.com/dsnet/compress/internal/prefix -# github.com/golang/snappy v0.0.1 +# github.com/golang/snappy v0.0.4 ## explicit github.com/golang/snappy -# github.com/klauspost/compress v1.10.10 -## explicit; go 1.13 +# github.com/klauspost/compress v1.15.4 +## explicit; go 1.16 +github.com/klauspost/compress github.com/klauspost/compress/flate github.com/klauspost/compress/fse github.com/klauspost/compress/huff0 -github.com/klauspost/compress/snappy +github.com/klauspost/compress/internal/cpuinfo +github.com/klauspost/compress/internal/snapref github.com/klauspost/compress/zstd github.com/klauspost/compress/zstd/internal/xxhash -# github.com/klauspost/pgzip v1.2.4 +# github.com/klauspost/pgzip v1.2.5 ## explicit github.com/klauspost/pgzip -# github.com/magiconair/properties v1.8.2-0.20200610093828-cfba716d4c41 +# github.com/magiconair/properties v1.8.6 ## explicit; go 1.13 github.com/magiconair/properties # github.com/mholt/archiver v1.1.3-0.20190917152942-233fc16b9615 ## explicit; go 1.12 github.com/mholt/archiver -# github.com/nwaples/rardecode v1.1.0 +# github.com/nwaples/rardecode v1.1.3 ## explicit github.com/nwaples/rardecode -# github.com/pierrec/lz4 v2.5.2+incompatible +# github.com/pierrec/lz4 v2.6.1+incompatible ## explicit github.com/pierrec/lz4 github.com/pierrec/lz4/internal/xxh32 # github.com/pmezard/go-difflib v1.0.0 ## explicit github.com/pmezard/go-difflib/difflib -# github.com/stretchr/testify v1.6.1 +# github.com/stretchr/testify v1.7.0 ## explicit; go 1.13 github.com/stretchr/testify/assert github.com/stretchr/testify/require -# github.com/ulikunitz/xz v0.5.7 +# github.com/ulikunitz/xz v0.5.10 ## explicit; go 1.12 github.com/ulikunitz/xz github.com/ulikunitz/xz/internal/hash @@ -73,6 +74,6 @@ github.com/ulikunitz/xz/lzma # github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 ## explicit github.com/xi2/xz -# gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 +# gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b ## explicit gopkg.in/yaml.v3