diff --git a/tree.go b/engine.go similarity index 100% rename from tree.go rename to engine.go diff --git a/tree_art.go b/engine_art.go similarity index 100% rename from tree_art.go rename to engine_art.go diff --git a/tree_null.go b/engine_null.go similarity index 100% rename from tree_null.go rename to engine_null.go diff --git a/engine_stringmap.go b/engine_stringmap.go index 2776744..f7a48f5 100644 --- a/engine_stringmap.go +++ b/engine_stringmap.go @@ -3,8 +3,10 @@ package expr import ( "context" "fmt" + "strconv" "sync" + "github.com/cespare/xxhash/v2" "github.com/google/cel-go/common/operators" "github.com/ohler55/ojg/jp" "golang.org/x/sync/errgroup" @@ -83,7 +85,16 @@ func (n *stringLookup) Search(ctx context.Context, variable string, input any) [ if !ok { return nil } - return n.strings[str] + return n.strings[n.hash(str)] +} + +// hash hashes strings quickly via xxhash. this provides a _somewhat_ collision-free +// lookup while reducing memory for strings. note that internally, go maps store the +// raw key as a string, which uses extra memory. by compressing all strings via this +// hash, memory usage grows predictably even with long strings. +func (n *stringLookup) hash(input string) string { + ui := xxhash.Sum64String(input) + return strconv.FormatUint(ui, 36) } func (n *stringLookup) Add(ctx context.Context, p ExpressionPart) error { @@ -93,7 +104,7 @@ func (n *stringLookup) Add(ctx context.Context, p ExpressionPart) error { n.lock.Lock() defer n.lock.Unlock() - val := p.Predicate.LiteralAsString() + val := n.hash(p.Predicate.LiteralAsString()) n.vars[p.Predicate.Ident] = struct{}{} @@ -114,7 +125,7 @@ func (n *stringLookup) Remove(ctx context.Context, p ExpressionPart) error { n.lock.Lock() defer n.lock.Unlock() - val := p.Predicate.LiteralAsString() + val := n.hash(p.Predicate.LiteralAsString()) coll, ok := n.strings[val] if !ok { diff --git a/engine_stringmap_test.go b/engine_stringmap_test.go index ae8ffc9..3b4d3c1 100644 --- a/engine_stringmap_test.go +++ b/engine_stringmap_test.go @@ -42,7 +42,7 @@ func TestEngineStringmap(t *testing.T) { }, }) require.NoError(t, err) - require.Equal(t, 2, len(s.strings["123"])) + require.Equal(t, 2, len(s.strings[s.hash("123")])) }) }) diff --git a/go.mod b/go.mod index 93632d9..ede4383 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/inngest/expr go 1.21.0 require ( + github.com/cespare/xxhash/v2 v2.2.0 github.com/google/cel-go v0.18.2 github.com/karlseguin/ccache/v2 v2.0.8 github.com/ohler55/ojg v1.21.0 diff --git a/go.sum b/go.sum index 8533854..e327a79 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI= github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g= +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/vendor/github.com/cespare/xxhash/v2/LICENSE.txt b/vendor/github.com/cespare/xxhash/v2/LICENSE.txt new file mode 100644 index 0000000..24b5306 --- /dev/null +++ b/vendor/github.com/cespare/xxhash/v2/LICENSE.txt @@ -0,0 +1,22 @@ +Copyright (c) 2016 Caleb Spare + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/cespare/xxhash/v2/README.md b/vendor/github.com/cespare/xxhash/v2/README.md new file mode 100644 index 0000000..8bf0e5b --- /dev/null +++ b/vendor/github.com/cespare/xxhash/v2/README.md @@ -0,0 +1,72 @@ +# xxhash + +[![Go Reference](https://pkg.go.dev/badge/github.com/cespare/xxhash/v2.svg)](https://pkg.go.dev/github.com/cespare/xxhash/v2) +[![Test](https://github.com/cespare/xxhash/actions/workflows/test.yml/badge.svg)](https://github.com/cespare/xxhash/actions/workflows/test.yml) + +xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a +high-quality hashing algorithm that is much faster than anything in the Go +standard library. + +This package provides a straightforward API: + +``` +func Sum64(b []byte) uint64 +func Sum64String(s string) uint64 +type Digest struct{ ... } + func New() *Digest +``` + +The `Digest` type implements hash.Hash64. Its key methods are: + +``` +func (*Digest) Write([]byte) (int, error) +func (*Digest) WriteString(string) (int, error) +func (*Digest) Sum64() uint64 +``` + +The package is written with optimized pure Go and also contains even faster +assembly implementations for amd64 and arm64. If desired, the `purego` build tag +opts into using the Go code even on those architectures. + +[xxHash]: http://cyan4973.github.io/xxHash/ + +## Compatibility + +This package is in a module and the latest code is in version 2 of the module. +You need a version of Go with at least "minimal module compatibility" to use +github.com/cespare/xxhash/v2: + +* 1.9.7+ for Go 1.9 +* 1.10.3+ for Go 1.10 +* Go 1.11 or later + +I recommend using the latest release of Go. + +## Benchmarks + +Here are some quick benchmarks comparing the pure-Go and assembly +implementations of Sum64. + +| input size | purego | asm | +| ---------- | --------- | --------- | +| 4 B | 1.3 GB/s | 1.2 GB/s | +| 16 B | 2.9 GB/s | 3.5 GB/s | +| 100 B | 6.9 GB/s | 8.1 GB/s | +| 4 KB | 11.7 GB/s | 16.7 GB/s | +| 10 MB | 12.0 GB/s | 17.3 GB/s | + +These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C +CPU using the following commands under Go 1.19.2: + +``` +benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$') +benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$') +``` + +## Projects using this package + +- [InfluxDB](https://github.com/influxdata/influxdb) +- [Prometheus](https://github.com/prometheus/prometheus) +- [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) +- [FreeCache](https://github.com/coocood/freecache) +- [FastCache](https://github.com/VictoriaMetrics/fastcache) diff --git a/vendor/github.com/cespare/xxhash/v2/testall.sh b/vendor/github.com/cespare/xxhash/v2/testall.sh new file mode 100644 index 0000000..94b9c44 --- /dev/null +++ b/vendor/github.com/cespare/xxhash/v2/testall.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -eu -o pipefail + +# Small convenience script for running the tests with various combinations of +# arch/tags. This assumes we're running on amd64 and have qemu available. + +go test ./... +go test -tags purego ./... +GOARCH=arm64 go test +GOARCH=arm64 go test -tags purego diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash.go b/vendor/github.com/cespare/xxhash/v2/xxhash.go new file mode 100644 index 0000000..a9e0d45 --- /dev/null +++ b/vendor/github.com/cespare/xxhash/v2/xxhash.go @@ -0,0 +1,228 @@ +// Package xxhash implements the 64-bit variant of xxHash (XXH64) as described +// at http://cyan4973.github.io/xxHash/. +package xxhash + +import ( + "encoding/binary" + "errors" + "math/bits" +) + +const ( + prime1 uint64 = 11400714785074694791 + prime2 uint64 = 14029467366897019727 + prime3 uint64 = 1609587929392839161 + prime4 uint64 = 9650029242287828579 + prime5 uint64 = 2870177450012600261 +) + +// Store the primes in an array as well. +// +// The consts are used when possible in Go code to avoid MOVs but we need a +// contiguous array of the assembly code. +var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5} + +// Digest implements hash.Hash64. +type Digest struct { + v1 uint64 + v2 uint64 + v3 uint64 + v4 uint64 + total uint64 + mem [32]byte + n int // how much of mem is used +} + +// New creates a new Digest that computes the 64-bit xxHash algorithm. +func New() *Digest { + var d Digest + d.Reset() + return &d +} + +// Reset clears the Digest's state so that it can be reused. +func (d *Digest) Reset() { + d.v1 = primes[0] + prime2 + d.v2 = prime2 + d.v3 = 0 + d.v4 = -primes[0] + d.total = 0 + d.n = 0 +} + +// Size always returns 8 bytes. +func (d *Digest) Size() int { return 8 } + +// BlockSize always returns 32 bytes. +func (d *Digest) BlockSize() int { return 32 } + +// Write adds more data to d. It always returns len(b), nil. +func (d *Digest) Write(b []byte) (n int, err error) { + n = len(b) + d.total += uint64(n) + + memleft := d.mem[d.n&(len(d.mem)-1):] + + if d.n+n < 32 { + // This new data doesn't even fill the current block. + copy(memleft, b) + d.n += n + return + } + + if d.n > 0 { + // Finish off the partial block. + c := copy(memleft, b) + d.v1 = round(d.v1, u64(d.mem[0:8])) + d.v2 = round(d.v2, u64(d.mem[8:16])) + d.v3 = round(d.v3, u64(d.mem[16:24])) + d.v4 = round(d.v4, u64(d.mem[24:32])) + b = b[c:] + d.n = 0 + } + + if len(b) >= 32 { + // One or more full blocks left. + nw := writeBlocks(d, b) + b = b[nw:] + } + + // Store any remaining partial block. + copy(d.mem[:], b) + d.n = len(b) + + return +} + +// Sum appends the current hash to b and returns the resulting slice. +func (d *Digest) Sum(b []byte) []byte { + s := d.Sum64() + return append( + b, + byte(s>>56), + byte(s>>48), + byte(s>>40), + byte(s>>32), + byte(s>>24), + byte(s>>16), + byte(s>>8), + byte(s), + ) +} + +// Sum64 returns the current hash. +func (d *Digest) Sum64() uint64 { + var h uint64 + + if d.total >= 32 { + v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4 + h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) + h = mergeRound(h, v1) + h = mergeRound(h, v2) + h = mergeRound(h, v3) + h = mergeRound(h, v4) + } else { + h = d.v3 + prime5 + } + + h += d.total + + b := d.mem[:d.n&(len(d.mem)-1)] + for ; len(b) >= 8; b = b[8:] { + k1 := round(0, u64(b[:8])) + h ^= k1 + h = rol27(h)*prime1 + prime4 + } + if len(b) >= 4 { + h ^= uint64(u32(b[:4])) * prime1 + h = rol23(h)*prime2 + prime3 + b = b[4:] + } + for ; len(b) > 0; b = b[1:] { + h ^= uint64(b[0]) * prime5 + h = rol11(h) * prime1 + } + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return h +} + +const ( + magic = "xxh\x06" + marshaledSize = len(magic) + 8*5 + 32 +) + +// MarshalBinary implements the encoding.BinaryMarshaler interface. +func (d *Digest) MarshalBinary() ([]byte, error) { + b := make([]byte, 0, marshaledSize) + b = append(b, magic...) + b = appendUint64(b, d.v1) + b = appendUint64(b, d.v2) + b = appendUint64(b, d.v3) + b = appendUint64(b, d.v4) + b = appendUint64(b, d.total) + b = append(b, d.mem[:d.n]...) + b = b[:len(b)+len(d.mem)-d.n] + return b, nil +} + +// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface. +func (d *Digest) UnmarshalBinary(b []byte) error { + if len(b) < len(magic) || string(b[:len(magic)]) != magic { + return errors.New("xxhash: invalid hash state identifier") + } + if len(b) != marshaledSize { + return errors.New("xxhash: invalid hash state size") + } + b = b[len(magic):] + b, d.v1 = consumeUint64(b) + b, d.v2 = consumeUint64(b) + b, d.v3 = consumeUint64(b) + b, d.v4 = consumeUint64(b) + b, d.total = consumeUint64(b) + copy(d.mem[:], b) + d.n = int(d.total % uint64(len(d.mem))) + return nil +} + +func appendUint64(b []byte, x uint64) []byte { + var a [8]byte + binary.LittleEndian.PutUint64(a[:], x) + return append(b, a[:]...) +} + +func consumeUint64(b []byte) ([]byte, uint64) { + x := u64(b) + return b[8:], x +} + +func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) } +func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) } + +func round(acc, input uint64) uint64 { + acc += input * prime2 + acc = rol31(acc) + acc *= prime1 + return acc +} + +func mergeRound(acc, val uint64) uint64 { + val = round(0, val) + acc ^= val + acc = acc*prime1 + prime4 + return acc +} + +func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) } +func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) } +func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) } +func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) } +func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) } +func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) } +func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) } +func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) } diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s b/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s new file mode 100644 index 0000000..3e8b132 --- /dev/null +++ b/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s @@ -0,0 +1,209 @@ +//go:build !appengine && gc && !purego +// +build !appengine +// +build gc +// +build !purego + +#include "textflag.h" + +// Registers: +#define h AX +#define d AX +#define p SI // pointer to advance through b +#define n DX +#define end BX // loop end +#define v1 R8 +#define v2 R9 +#define v3 R10 +#define v4 R11 +#define x R12 +#define prime1 R13 +#define prime2 R14 +#define prime4 DI + +#define round(acc, x) \ + IMULQ prime2, x \ + ADDQ x, acc \ + ROLQ $31, acc \ + IMULQ prime1, acc + +// round0 performs the operation x = round(0, x). +#define round0(x) \ + IMULQ prime2, x \ + ROLQ $31, x \ + IMULQ prime1, x + +// mergeRound applies a merge round on the two registers acc and x. +// It assumes that prime1, prime2, and prime4 have been loaded. +#define mergeRound(acc, x) \ + round0(x) \ + XORQ x, acc \ + IMULQ prime1, acc \ + ADDQ prime4, acc + +// blockLoop processes as many 32-byte blocks as possible, +// updating v1, v2, v3, and v4. It assumes that there is at least one block +// to process. +#define blockLoop() \ +loop: \ + MOVQ +0(p), x \ + round(v1, x) \ + MOVQ +8(p), x \ + round(v2, x) \ + MOVQ +16(p), x \ + round(v3, x) \ + MOVQ +24(p), x \ + round(v4, x) \ + ADDQ $32, p \ + CMPQ p, end \ + JLE loop + +// func Sum64(b []byte) uint64 +TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 + // Load fixed primes. + MOVQ ·primes+0(SB), prime1 + MOVQ ·primes+8(SB), prime2 + MOVQ ·primes+24(SB), prime4 + + // Load slice. + MOVQ b_base+0(FP), p + MOVQ b_len+8(FP), n + LEAQ (p)(n*1), end + + // The first loop limit will be len(b)-32. + SUBQ $32, end + + // Check whether we have at least one block. + CMPQ n, $32 + JLT noBlocks + + // Set up initial state (v1, v2, v3, v4). + MOVQ prime1, v1 + ADDQ prime2, v1 + MOVQ prime2, v2 + XORQ v3, v3 + XORQ v4, v4 + SUBQ prime1, v4 + + blockLoop() + + MOVQ v1, h + ROLQ $1, h + MOVQ v2, x + ROLQ $7, x + ADDQ x, h + MOVQ v3, x + ROLQ $12, x + ADDQ x, h + MOVQ v4, x + ROLQ $18, x + ADDQ x, h + + mergeRound(h, v1) + mergeRound(h, v2) + mergeRound(h, v3) + mergeRound(h, v4) + + JMP afterBlocks + +noBlocks: + MOVQ ·primes+32(SB), h + +afterBlocks: + ADDQ n, h + + ADDQ $24, end + CMPQ p, end + JG try4 + +loop8: + MOVQ (p), x + ADDQ $8, p + round0(x) + XORQ x, h + ROLQ $27, h + IMULQ prime1, h + ADDQ prime4, h + + CMPQ p, end + JLE loop8 + +try4: + ADDQ $4, end + CMPQ p, end + JG try1 + + MOVL (p), x + ADDQ $4, p + IMULQ prime1, x + XORQ x, h + + ROLQ $23, h + IMULQ prime2, h + ADDQ ·primes+16(SB), h + +try1: + ADDQ $4, end + CMPQ p, end + JGE finalize + +loop1: + MOVBQZX (p), x + ADDQ $1, p + IMULQ ·primes+32(SB), x + XORQ x, h + ROLQ $11, h + IMULQ prime1, h + + CMPQ p, end + JL loop1 + +finalize: + MOVQ h, x + SHRQ $33, x + XORQ x, h + IMULQ prime2, h + MOVQ h, x + SHRQ $29, x + XORQ x, h + IMULQ ·primes+16(SB), h + MOVQ h, x + SHRQ $32, x + XORQ x, h + + MOVQ h, ret+24(FP) + RET + +// func writeBlocks(d *Digest, b []byte) int +TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 + // Load fixed primes needed for round. + MOVQ ·primes+0(SB), prime1 + MOVQ ·primes+8(SB), prime2 + + // Load slice. + MOVQ b_base+8(FP), p + MOVQ b_len+16(FP), n + LEAQ (p)(n*1), end + SUBQ $32, end + + // Load vN from d. + MOVQ s+0(FP), d + MOVQ 0(d), v1 + MOVQ 8(d), v2 + MOVQ 16(d), v3 + MOVQ 24(d), v4 + + // We don't need to check the loop condition here; this function is + // always called with at least one block of data to process. + blockLoop() + + // Copy vN back to d. + MOVQ v1, 0(d) + MOVQ v2, 8(d) + MOVQ v3, 16(d) + MOVQ v4, 24(d) + + // The number of bytes written is p minus the old base pointer. + SUBQ b_base+8(FP), p + MOVQ p, ret+32(FP) + + RET diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_arm64.s b/vendor/github.com/cespare/xxhash/v2/xxhash_arm64.s new file mode 100644 index 0000000..7e3145a --- /dev/null +++ b/vendor/github.com/cespare/xxhash/v2/xxhash_arm64.s @@ -0,0 +1,183 @@ +//go:build !appengine && gc && !purego +// +build !appengine +// +build gc +// +build !purego + +#include "textflag.h" + +// Registers: +#define digest R1 +#define h R2 // return value +#define p R3 // input pointer +#define n R4 // input length +#define nblocks R5 // n / 32 +#define prime1 R7 +#define prime2 R8 +#define prime3 R9 +#define prime4 R10 +#define prime5 R11 +#define v1 R12 +#define v2 R13 +#define v3 R14 +#define v4 R15 +#define x1 R20 +#define x2 R21 +#define x3 R22 +#define x4 R23 + +#define round(acc, x) \ + MADD prime2, acc, x, acc \ + ROR $64-31, acc \ + MUL prime1, acc + +// round0 performs the operation x = round(0, x). +#define round0(x) \ + MUL prime2, x \ + ROR $64-31, x \ + MUL prime1, x + +#define mergeRound(acc, x) \ + round0(x) \ + EOR x, acc \ + MADD acc, prime4, prime1, acc + +// blockLoop processes as many 32-byte blocks as possible, +// updating v1, v2, v3, and v4. It assumes that n >= 32. +#define blockLoop() \ + LSR $5, n, nblocks \ + PCALIGN $16 \ + loop: \ + LDP.P 16(p), (x1, x2) \ + LDP.P 16(p), (x3, x4) \ + round(v1, x1) \ + round(v2, x2) \ + round(v3, x3) \ + round(v4, x4) \ + SUB $1, nblocks \ + CBNZ nblocks, loop + +// func Sum64(b []byte) uint64 +TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 + LDP b_base+0(FP), (p, n) + + LDP ·primes+0(SB), (prime1, prime2) + LDP ·primes+16(SB), (prime3, prime4) + MOVD ·primes+32(SB), prime5 + + CMP $32, n + CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 } + BLT afterLoop + + ADD prime1, prime2, v1 + MOVD prime2, v2 + MOVD $0, v3 + NEG prime1, v4 + + blockLoop() + + ROR $64-1, v1, x1 + ROR $64-7, v2, x2 + ADD x1, x2 + ROR $64-12, v3, x3 + ROR $64-18, v4, x4 + ADD x3, x4 + ADD x2, x4, h + + mergeRound(h, v1) + mergeRound(h, v2) + mergeRound(h, v3) + mergeRound(h, v4) + +afterLoop: + ADD n, h + + TBZ $4, n, try8 + LDP.P 16(p), (x1, x2) + + round0(x1) + + // NOTE: here and below, sequencing the EOR after the ROR (using a + // rotated register) is worth a small but measurable speedup for small + // inputs. + ROR $64-27, h + EOR x1 @> 64-27, h, h + MADD h, prime4, prime1, h + + round0(x2) + ROR $64-27, h + EOR x2 @> 64-27, h, h + MADD h, prime4, prime1, h + +try8: + TBZ $3, n, try4 + MOVD.P 8(p), x1 + + round0(x1) + ROR $64-27, h + EOR x1 @> 64-27, h, h + MADD h, prime4, prime1, h + +try4: + TBZ $2, n, try2 + MOVWU.P 4(p), x2 + + MUL prime1, x2 + ROR $64-23, h + EOR x2 @> 64-23, h, h + MADD h, prime3, prime2, h + +try2: + TBZ $1, n, try1 + MOVHU.P 2(p), x3 + AND $255, x3, x1 + LSR $8, x3, x2 + + MUL prime5, x1 + ROR $64-11, h + EOR x1 @> 64-11, h, h + MUL prime1, h + + MUL prime5, x2 + ROR $64-11, h + EOR x2 @> 64-11, h, h + MUL prime1, h + +try1: + TBZ $0, n, finalize + MOVBU (p), x4 + + MUL prime5, x4 + ROR $64-11, h + EOR x4 @> 64-11, h, h + MUL prime1, h + +finalize: + EOR h >> 33, h + MUL prime2, h + EOR h >> 29, h + MUL prime3, h + EOR h >> 32, h + + MOVD h, ret+24(FP) + RET + +// func writeBlocks(d *Digest, b []byte) int +TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 + LDP ·primes+0(SB), (prime1, prime2) + + // Load state. Assume v[1-4] are stored contiguously. + MOVD d+0(FP), digest + LDP 0(digest), (v1, v2) + LDP 16(digest), (v3, v4) + + LDP b_base+8(FP), (p, n) + + blockLoop() + + // Store updated state. + STP (v1, v2), 0(digest) + STP (v3, v4), 16(digest) + + BIC $31, n + MOVD n, ret+32(FP) + RET diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_asm.go b/vendor/github.com/cespare/xxhash/v2/xxhash_asm.go new file mode 100644 index 0000000..9216e0a --- /dev/null +++ b/vendor/github.com/cespare/xxhash/v2/xxhash_asm.go @@ -0,0 +1,15 @@ +//go:build (amd64 || arm64) && !appengine && gc && !purego +// +build amd64 arm64 +// +build !appengine +// +build gc +// +build !purego + +package xxhash + +// Sum64 computes the 64-bit xxHash digest of b. +// +//go:noescape +func Sum64(b []byte) uint64 + +//go:noescape +func writeBlocks(d *Digest, b []byte) int diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_other.go b/vendor/github.com/cespare/xxhash/v2/xxhash_other.go new file mode 100644 index 0000000..26df13b --- /dev/null +++ b/vendor/github.com/cespare/xxhash/v2/xxhash_other.go @@ -0,0 +1,76 @@ +//go:build (!amd64 && !arm64) || appengine || !gc || purego +// +build !amd64,!arm64 appengine !gc purego + +package xxhash + +// Sum64 computes the 64-bit xxHash digest of b. +func Sum64(b []byte) uint64 { + // A simpler version would be + // d := New() + // d.Write(b) + // return d.Sum64() + // but this is faster, particularly for small inputs. + + n := len(b) + var h uint64 + + if n >= 32 { + v1 := primes[0] + prime2 + v2 := prime2 + v3 := uint64(0) + v4 := -primes[0] + for len(b) >= 32 { + v1 = round(v1, u64(b[0:8:len(b)])) + v2 = round(v2, u64(b[8:16:len(b)])) + v3 = round(v3, u64(b[16:24:len(b)])) + v4 = round(v4, u64(b[24:32:len(b)])) + b = b[32:len(b):len(b)] + } + h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) + h = mergeRound(h, v1) + h = mergeRound(h, v2) + h = mergeRound(h, v3) + h = mergeRound(h, v4) + } else { + h = prime5 + } + + h += uint64(n) + + for ; len(b) >= 8; b = b[8:] { + k1 := round(0, u64(b[:8])) + h ^= k1 + h = rol27(h)*prime1 + prime4 + } + if len(b) >= 4 { + h ^= uint64(u32(b[:4])) * prime1 + h = rol23(h)*prime2 + prime3 + b = b[4:] + } + for ; len(b) > 0; b = b[1:] { + h ^= uint64(b[0]) * prime5 + h = rol11(h) * prime1 + } + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return h +} + +func writeBlocks(d *Digest, b []byte) int { + v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4 + n := len(b) + for len(b) >= 32 { + v1 = round(v1, u64(b[0:8:len(b)])) + v2 = round(v2, u64(b[8:16:len(b)])) + v3 = round(v3, u64(b[16:24:len(b)])) + v4 = round(v4, u64(b[24:32:len(b)])) + b = b[32:len(b):len(b)] + } + d.v1, d.v2, d.v3, d.v4 = v1, v2, v3, v4 + return n - len(b) +} diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go b/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go new file mode 100644 index 0000000..e86f1b5 --- /dev/null +++ b/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go @@ -0,0 +1,16 @@ +//go:build appengine +// +build appengine + +// This file contains the safe implementations of otherwise unsafe-using code. + +package xxhash + +// Sum64String computes the 64-bit xxHash digest of s. +func Sum64String(s string) uint64 { + return Sum64([]byte(s)) +} + +// WriteString adds more data to d. It always returns len(s), nil. +func (d *Digest) WriteString(s string) (n int, err error) { + return d.Write([]byte(s)) +} diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go b/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go new file mode 100644 index 0000000..1c1638f --- /dev/null +++ b/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go @@ -0,0 +1,58 @@ +//go:build !appengine +// +build !appengine + +// This file encapsulates usage of unsafe. +// xxhash_safe.go contains the safe implementations. + +package xxhash + +import ( + "unsafe" +) + +// In the future it's possible that compiler optimizations will make these +// XxxString functions unnecessary by realizing that calls such as +// Sum64([]byte(s)) don't need to copy s. See https://go.dev/issue/2205. +// If that happens, even if we keep these functions they can be replaced with +// the trivial safe code. + +// NOTE: The usual way of doing an unsafe string-to-[]byte conversion is: +// +// var b []byte +// bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) +// bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data +// bh.Len = len(s) +// bh.Cap = len(s) +// +// Unfortunately, as of Go 1.15.3 the inliner's cost model assigns a high enough +// weight to this sequence of expressions that any function that uses it will +// not be inlined. Instead, the functions below use a different unsafe +// conversion designed to minimize the inliner weight and allow both to be +// inlined. There is also a test (TestInlining) which verifies that these are +// inlined. +// +// See https://github.com/golang/go/issues/42739 for discussion. + +// Sum64String computes the 64-bit xxHash digest of s. +// It may be faster than Sum64([]byte(s)) by avoiding a copy. +func Sum64String(s string) uint64 { + b := *(*[]byte)(unsafe.Pointer(&sliceHeader{s, len(s)})) + return Sum64(b) +} + +// WriteString adds more data to d. It always returns len(s), nil. +// It may be faster than Write([]byte(s)) by avoiding a copy. +func (d *Digest) WriteString(s string) (n int, err error) { + d.Write(*(*[]byte)(unsafe.Pointer(&sliceHeader{s, len(s)}))) + // d.Write always returns len(s), nil. + // Ignoring the return output and returning these fixed values buys a + // savings of 6 in the inliner's cost model. + return len(s), nil +} + +// sliceHeader is similar to reflect.SliceHeader, but it assumes that the layout +// of the first two words is the same as the layout of a string. +type sliceHeader struct { + s string + cap int +} diff --git a/vendor/modules.txt b/vendor/modules.txt index d791e94..88e3431 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,6 +1,9 @@ # github.com/antlr4-go/antlr/v4 v4.13.0 ## explicit; go 1.20 github.com/antlr4-go/antlr/v4 +# github.com/cespare/xxhash/v2 v2.2.0 +## explicit; go 1.11 +github.com/cespare/xxhash/v2 # github.com/davecgh/go-spew v1.1.1 ## explicit github.com/davecgh/go-spew/spew