From f912e54bac00d84e3bb6d2f9292610ab1b8a471f Mon Sep 17 00:00:00 2001 From: "cluster-stack-bot[bot]" <143188378+cluster-stack-bot[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 11:18:21 +0000 Subject: [PATCH] :seedling: Update Update Golang Dependencies group | datasource | package | from | to | | ---------- | ------------------------------------ | ------- | ------- | | go | github.com/SovereignCloudStack/csctl | v0.0.3 | v0.0.4 | | go | github.com/gophercloud/gophercloud | v1.14.0 | v1.14.1 | | go | github.com/minio/minio-go/v7 | v7.0.76 | v7.0.83 | --- go.mod | 24 +- go.sum | 41 +- .../csctl/pkg/clusterstack/config.go | 12 + .../csctl/pkg/hash/hash.go | 16 + .../goccy/go-json/internal/decoder/compile.go | 18 +- .../internal/decoder/compile_norace.go | 1 + .../go-json/internal/decoder/compile_race.go | 1 + .../go-json/internal/encoder/compiler.go | 16 +- .../internal/encoder/compiler_norace.go | 1 + .../go-json/internal/encoder/compiler_race.go | 1 + .../goccy/go-json/internal/encoder/encoder.go | 5 + .../gophercloud/gophercloud/CHANGELOG.md | 4 + .../gophercloud/provider_client.go | 2 +- .../klauspost/compress/s2/encode.go | 25 +- .../klauspost/compress/s2/encode_amd64.go | 201 +- .../klauspost/compress/s2/encode_go.go | 4 +- .../compress/s2/encodeblock_amd64.go | 44 +- .../klauspost/compress/s2/encodeblock_amd64.s | 21920 ++++++++-------- .../klauspost/compress/s2/writer.go | 31 +- .../github.com/klauspost/cpuid/v2/README.md | 1 + vendor/github.com/klauspost/cpuid/v2/cpuid.go | 84 +- .../klauspost/cpuid/v2/cpuid_arm64.s | 10 + .../klauspost/cpuid/v2/detect_arm64.go | 3 +- .../klauspost/cpuid/v2/detect_ref.go | 2 + .../klauspost/cpuid/v2/detect_x86.go | 3 + .../klauspost/cpuid/v2/featureid_string.go | 440 +- vendor/github.com/minio/minio-go/v7/Makefile | 4 + vendor/github.com/minio/minio-go/v7/README.md | 2 +- .../minio/minio-go/v7/api-datatypes.go | 18 +- .../minio/minio-go/v7/api-get-object.go | 4 +- .../minio/minio-go/v7/api-presigned.go | 2 +- .../minio/minio-go/v7/api-prompt-object.go | 78 + .../minio/minio-go/v7/api-prompt-options.go | 84 + .../minio-go/v7/api-put-object-fan-out.go | 5 +- .../minio-go/v7/api-put-object-multipart.go | 48 +- .../minio-go/v7/api-put-object-streaming.go | 150 +- .../minio/minio-go/v7/api-put-object.go | 60 +- .../minio-go/v7/api-putobject-snowball.go | 2 +- .../minio/minio-go/v7/api-s3-datatypes.go | 70 +- vendor/github.com/minio/minio-go/v7/api.go | 30 +- .../minio/minio-go/v7/bucket-cache.go | 2 +- .../github.com/minio/minio-go/v7/checksum.go | 210 +- .../minio/minio-go/v7/functional_tests.go | 2199 +- .../v7/pkg/credentials/assume_role.go | 43 +- .../minio-go/v7/pkg/credentials/chain.go | 18 + .../v7/pkg/credentials/credentials.go | 48 +- .../minio-go/v7/pkg/credentials/env_aws.go | 13 +- .../minio-go/v7/pkg/credentials/env_minio.go | 13 +- .../pkg/credentials/file_aws_credentials.go | 15 +- .../v7/pkg/credentials/file_minio_client.go | 15 +- .../minio-go/v7/pkg/credentials/iam_aws.go | 47 +- .../minio-go/v7/pkg/credentials/static.go | 5 + .../v7/pkg/credentials/sts_client_grants.go | 42 +- .../v7/pkg/credentials/sts_custom_identity.go | 36 +- .../v7/pkg/credentials/sts_ldap_identity.go | 40 +- .../v7/pkg/credentials/sts_tls_identity.go | 100 +- .../v7/pkg/credentials/sts_web_identity.go | 95 +- .../minio-go/v7/pkg/lifecycle/lifecycle.go | 26 + .../minio/minio-go/v7/pkg/tags/tags.go | 2 +- .../minio/minio-go/v7/post-policy.go | 88 +- .../minio/minio-go/v7/retry-continous.go | 10 +- vendor/github.com/minio/minio-go/v7/retry.go | 16 +- vendor/github.com/minio/minio-go/v7/utils.go | 152 +- .../golang.org/x/crypto/argon2/blamka_amd64.s | 2972 ++- .../x/crypto/blake2b/blake2bAVX2_amd64.s | 5167 +++- .../x/crypto/blake2b/blake2b_amd64.s | 1681 +- .../golang.org/x/sys/cpu/asm_darwin_x86_gc.s | 17 + vendor/golang.org/x/sys/cpu/cpu.go | 19 + vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go | 61 + vendor/golang.org/x/sys/cpu/cpu_gc_x86.go | 4 +- .../x/sys/cpu/{cpu_x86.s => cpu_gc_x86.s} | 2 +- vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go | 6 - .../golang.org/x/sys/cpu/cpu_linux_arm64.go | 1 - .../golang.org/x/sys/cpu/cpu_linux_noinit.go | 2 +- .../golang.org/x/sys/cpu/cpu_linux_riscv64.go | 137 + vendor/golang.org/x/sys/cpu/cpu_other_x86.go | 11 + vendor/golang.org/x/sys/cpu/cpu_riscv64.go | 11 +- vendor/golang.org/x/sys/cpu/cpu_x86.go | 6 +- .../x/sys/cpu/syscall_darwin_x86_gc.go | 98 + vendor/golang.org/x/sys/unix/README.md | 2 +- vendor/golang.org/x/sys/unix/ioctl_linux.go | 96 + vendor/golang.org/x/sys/unix/mkerrors.sh | 17 +- vendor/golang.org/x/sys/unix/syscall_aix.go | 2 +- .../golang.org/x/sys/unix/syscall_darwin.go | 37 + vendor/golang.org/x/sys/unix/syscall_hurd.go | 1 + vendor/golang.org/x/sys/unix/syscall_linux.go | 64 +- .../x/sys/unix/syscall_linux_arm64.go | 2 + .../x/sys/unix/syscall_linux_loong64.go | 2 + .../x/sys/unix/syscall_linux_riscv64.go | 2 + .../x/sys/unix/syscall_zos_s390x.go | 104 +- .../golang.org/x/sys/unix/vgetrandom_linux.go | 13 + .../x/sys/unix/vgetrandom_unsupported.go | 11 + .../x/sys/unix/zerrors_darwin_amd64.go | 7 + .../x/sys/unix/zerrors_darwin_arm64.go | 7 + vendor/golang.org/x/sys/unix/zerrors_linux.go | 44 +- .../x/sys/unix/zerrors_linux_386.go | 25 + .../x/sys/unix/zerrors_linux_amd64.go | 25 + .../x/sys/unix/zerrors_linux_arm.go | 25 + .../x/sys/unix/zerrors_linux_arm64.go | 26 + .../x/sys/unix/zerrors_linux_loong64.go | 25 + .../x/sys/unix/zerrors_linux_mips.go | 25 + .../x/sys/unix/zerrors_linux_mips64.go | 25 + .../x/sys/unix/zerrors_linux_mips64le.go | 25 + .../x/sys/unix/zerrors_linux_mipsle.go | 25 + .../x/sys/unix/zerrors_linux_ppc.go | 25 + .../x/sys/unix/zerrors_linux_ppc64.go | 25 + .../x/sys/unix/zerrors_linux_ppc64le.go | 25 + .../x/sys/unix/zerrors_linux_riscv64.go | 25 + .../x/sys/unix/zerrors_linux_s390x.go | 25 + .../x/sys/unix/zerrors_linux_sparc64.go | 25 + .../x/sys/unix/zerrors_zos_s390x.go | 2 + .../x/sys/unix/zsyscall_darwin_amd64.go | 20 + .../x/sys/unix/zsyscall_darwin_amd64.s | 5 + .../x/sys/unix/zsyscall_darwin_arm64.go | 20 + .../x/sys/unix/zsyscall_darwin_arm64.s | 5 + .../golang.org/x/sys/unix/zsyscall_linux.go | 27 +- .../x/sys/unix/zsysnum_linux_amd64.go | 1 + .../x/sys/unix/zsysnum_linux_arm64.go | 2 +- .../x/sys/unix/zsysnum_linux_loong64.go | 2 + .../x/sys/unix/zsysnum_linux_riscv64.go | 2 +- .../x/sys/unix/ztypes_darwin_amd64.go | 73 + .../x/sys/unix/ztypes_darwin_arm64.go | 73 + .../x/sys/unix/ztypes_freebsd_386.go | 1 + .../x/sys/unix/ztypes_freebsd_amd64.go | 1 + .../x/sys/unix/ztypes_freebsd_arm.go | 1 + .../x/sys/unix/ztypes_freebsd_arm64.go | 1 + .../x/sys/unix/ztypes_freebsd_riscv64.go | 1 + vendor/golang.org/x/sys/unix/ztypes_linux.go | 224 +- .../x/sys/unix/ztypes_linux_riscv64.go | 33 + .../golang.org/x/sys/unix/ztypes_zos_s390x.go | 6 + vendor/modules.txt | 26 +- 131 files changed, 23788 insertions(+), 14422 deletions(-) create mode 100644 vendor/github.com/minio/minio-go/v7/api-prompt-object.go create mode 100644 vendor/github.com/minio/minio-go/v7/api-prompt-options.go create mode 100644 vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s create mode 100644 vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go rename vendor/golang.org/x/sys/cpu/{cpu_x86.s => cpu_gc_x86.s} (94%) create mode 100644 vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go create mode 100644 vendor/golang.org/x/sys/cpu/cpu_other_x86.go create mode 100644 vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go create mode 100644 vendor/golang.org/x/sys/unix/vgetrandom_linux.go create mode 100644 vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go diff --git a/go.mod b/go.mod index c936466..130fa5c 100644 --- a/go.mod +++ b/go.mod @@ -1,12 +1,14 @@ module github.com/SovereignCloudStack/csctl-plugin-openstack -go 1.21 +go 1.22 + +toolchain go1.22.10 require ( - github.com/SovereignCloudStack/csctl v0.0.3 + github.com/SovereignCloudStack/csctl v0.0.4 github.com/goccy/go-yaml v1.12.0 - github.com/gophercloud/gophercloud v1.14.0 - github.com/minio/minio-go/v7 v7.0.76 + github.com/gophercloud/gophercloud v1.14.1 + github.com/minio/minio-go/v7 v7.0.83 github.com/spf13/cobra v1.8.1 ) @@ -15,22 +17,22 @@ require ( github.com/dustin/go-humanize v1.0.1 // indirect github.com/fatih/color v1.13.0 // indirect github.com/go-ini/ini v1.67.0 // indirect - github.com/goccy/go-json v0.10.3 // indirect + github.com/goccy/go-json v0.10.4 // indirect github.com/google/uuid v1.6.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/klauspost/compress v1.17.9 // indirect - github.com/klauspost/cpuid/v2 v2.2.8 // indirect + github.com/klauspost/compress v1.17.11 // indirect + github.com/klauspost/cpuid/v2 v2.2.9 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/minio/md5-simd v1.1.2 // indirect github.com/rs/xid v1.6.0 // indirect github.com/spf13/pflag v1.0.5 // indirect - golang.org/x/crypto v0.26.0 // indirect + golang.org/x/crypto v0.31.0 // indirect golang.org/x/mod v0.17.0 // indirect - golang.org/x/net v0.28.0 // indirect - golang.org/x/sys v0.24.0 // indirect - golang.org/x/text v0.17.0 // indirect + golang.org/x/net v0.33.0 // indirect + golang.org/x/sys v0.28.0 // indirect + golang.org/x/text v0.21.0 // indirect golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 435a47e..18970c4 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,7 @@ github.com/SovereignCloudStack/cluster-stack-operator v0.1.0-alpha.5 h1:jONI0j2BVpL6ubQt9nT8LxWwtHQ1kyrvt1X/1+Hrea8= github.com/SovereignCloudStack/cluster-stack-operator v0.1.0-alpha.5/go.mod h1:zrwUudq/JQae24/yzS5exA1ZwaXxIL2ZtKIQVrYuPqY= -github.com/SovereignCloudStack/csctl v0.0.3 h1:OVR5pkoa2JBJWenvQM3KeboGIPPnFATgEPUrABfbmcU= -github.com/SovereignCloudStack/csctl v0.0.3/go.mod h1:RzJ62+gjKyWjAMvHD+xoou5ZDZ3KDHTHxzmAlmOBr7Y= +github.com/SovereignCloudStack/csctl v0.0.4 h1:G8Y5xj/su7ghQ4WPD2gzyQv3cYuUZTKAduY/0C2z6jM= +github.com/SovereignCloudStack/csctl v0.0.4/go.mod h1:WE4TuFZcPLgR/sAsqOhptCr60Uj88mig2xk9tKGe1W4= github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= @@ -18,23 +18,23 @@ github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD87 github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= github.com/go-playground/validator/v10 v10.4.1 h1:pH2c5ADXtd66mxoE0Zm9SUhxE20r7aM3F26W0hOn+GE= github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4= -github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= -github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= +github.com/goccy/go-json v0.10.4 h1:JSwxQzIqKfmFX1swYPpUThQZp/Ka4wzJdK0LWVytLPM= +github.com/goccy/go-json v0.10.4/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= github.com/goccy/go-yaml v1.12.0 h1:/1WHjnMsI1dlIBQutrvSMGZRQufVO3asrHfTwfACoPM= github.com/goccy/go-yaml v1.12.0/go.mod h1:wKnAMd44+9JAAnGQpWVEgBzGt3YuTaQ4uXoHvE4m7WU= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/gophercloud/gophercloud v1.14.0 h1:Bt9zQDhPrbd4qX7EILGmy+i7GP35cc+AAL2+wIJpUE8= -github.com/gophercloud/gophercloud v1.14.0/go.mod h1:aAVqcocTSXh2vYFZ1JTvx4EQmfgzxRcNupUfxZbBNDM= +github.com/gophercloud/gophercloud v1.14.1 h1:DTCNaTVGl8/cFu58O1JwWgis9gtISAFONqpMKNg/Vpw= +github.com/gophercloud/gophercloud v1.14.1/go.mod h1:aAVqcocTSXh2vYFZ1JTvx4EQmfgzxRcNupUfxZbBNDM= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= -github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY= +github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -54,8 +54,8 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= -github.com/minio/minio-go/v7 v7.0.76 h1:9nxHH2XDai61cT/EFhyIw/wW4vJfpPNvl7lSFpRt+Ng= -github.com/minio/minio-go/v7 v7.0.76/go.mod h1:AVM3IUN6WwKzmwBxVdjzhH8xq+f57JSbbvzqvUzR6eg= +github.com/minio/minio-go/v7 v7.0.83 h1:W4Kokksvlz3OKf3OqIlzDNKd4MERlC2oN8YptwJ0+GA= +github.com/minio/minio-go/v7 v7.0.83/go.mod h1:57YXpvc5l3rjPdhqNrDsvVlY0qPI6UTk1bflAe+9doY= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -71,13 +71,13 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= -golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= +golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= -golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -85,14 +85,13 @@ golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= -golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= -golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/vendor/github.com/SovereignCloudStack/csctl/pkg/clusterstack/config.go b/vendor/github.com/SovereignCloudStack/csctl/pkg/clusterstack/config.go index 56cb078..4d991bb 100644 --- a/vendor/github.com/SovereignCloudStack/csctl/pkg/clusterstack/config.go +++ b/vendor/github.com/SovereignCloudStack/csctl/pkg/clusterstack/config.go @@ -131,3 +131,15 @@ func GetClusterStackReleaseDirectoryName(metadata *MetaData, config *CsctlConfig return clusterStackReleaseDirName, nil } + +// GetClusterStackReleaseName return the cluster stack release name. +// e.g. - docker-ferrol-1-27-v0-sha.uxumi7s . +func GetClusterStackReleaseName(metada *MetaData, config *CsctlConfig) (string, error) { + kubernetesVerion, err := config.ParseKubernetesVersion() + if err != nil { + return "", fmt.Errorf("failed to parse kubernetes version: %w", err) + } + + clusterStackReleaseName := fmt.Sprintf("%s-%s-%s-%s", config.Config.Provider.Type, config.Config.ClusterStackName, kubernetesVerion.String(), metada.Versions.ClusterStack) + return clusterStackReleaseName, nil +} diff --git a/vendor/github.com/SovereignCloudStack/csctl/pkg/hash/hash.go b/vendor/github.com/SovereignCloudStack/csctl/pkg/hash/hash.go index a5ba160..1b43269 100644 --- a/vendor/github.com/SovereignCloudStack/csctl/pkg/hash/hash.go +++ b/vendor/github.com/SovereignCloudStack/csctl/pkg/hash/hash.go @@ -20,6 +20,7 @@ package hash import ( "crypto/sha256" "encoding/base64" + "encoding/json" "fmt" "io" "os" @@ -43,6 +44,21 @@ type ReleaseHash struct { NodeImageDir string `json:"nodeImageDir,omitempty"` } +// ParseReleaseHash parses the cluster-stack release hash. +func ParseReleaseHash(path string) (ReleaseHash, error) { + latestGitHubReleaseHashData, err := os.ReadFile(filepath.Clean(path)) + if err != nil { + return ReleaseHash{}, fmt.Errorf("failed to read hash: %q: %w", path, err) + } + + var releaseHash ReleaseHash + if err := json.Unmarshal(latestGitHubReleaseHashData, &releaseHash); err != nil { + return ReleaseHash{}, fmt.Errorf("failed to unmarshal json: %q: %w", path, err) + } + + return releaseHash, nil +} + // GetHash returns the release hash. func GetHash(path string) (ReleaseHash, error) { entries, err := os.ReadDir(path) diff --git a/vendor/github.com/goccy/go-json/internal/decoder/compile.go b/vendor/github.com/goccy/go-json/internal/decoder/compile.go index fab6437..8ad5093 100644 --- a/vendor/github.com/goccy/go-json/internal/decoder/compile.go +++ b/vendor/github.com/goccy/go-json/internal/decoder/compile.go @@ -5,6 +5,7 @@ import ( "fmt" "reflect" "strings" + "sync" "sync/atomic" "unicode" "unsafe" @@ -17,22 +18,27 @@ var ( typeAddr *runtime.TypeAddr cachedDecoderMap unsafe.Pointer // map[uintptr]decoder cachedDecoder []Decoder + initOnce sync.Once ) -func init() { - typeAddr = runtime.AnalyzeTypeAddr() - if typeAddr == nil { - typeAddr = &runtime.TypeAddr{} - } - cachedDecoder = make([]Decoder, typeAddr.AddrRange>>typeAddr.AddrShift+1) +func initDecoder() { + initOnce.Do(func() { + typeAddr = runtime.AnalyzeTypeAddr() + if typeAddr == nil { + typeAddr = &runtime.TypeAddr{} + } + cachedDecoder = make([]Decoder, typeAddr.AddrRange>>typeAddr.AddrShift+1) + }) } func loadDecoderMap() map[uintptr]Decoder { + initDecoder() p := atomic.LoadPointer(&cachedDecoderMap) return *(*map[uintptr]Decoder)(unsafe.Pointer(&p)) } func storeDecoder(typ uintptr, dec Decoder, m map[uintptr]Decoder) { + initDecoder() newDecoderMap := make(map[uintptr]Decoder, len(m)+1) newDecoderMap[typ] = dec diff --git a/vendor/github.com/goccy/go-json/internal/decoder/compile_norace.go b/vendor/github.com/goccy/go-json/internal/decoder/compile_norace.go index eb7e2b1..025ca85 100644 --- a/vendor/github.com/goccy/go-json/internal/decoder/compile_norace.go +++ b/vendor/github.com/goccy/go-json/internal/decoder/compile_norace.go @@ -10,6 +10,7 @@ import ( ) func CompileToGetDecoder(typ *runtime.Type) (Decoder, error) { + initDecoder() typeptr := uintptr(unsafe.Pointer(typ)) if typeptr > typeAddr.MaxTypeAddr { return compileToGetDecoderSlowPath(typeptr, typ) diff --git a/vendor/github.com/goccy/go-json/internal/decoder/compile_race.go b/vendor/github.com/goccy/go-json/internal/decoder/compile_race.go index 49cdda4..023b817 100644 --- a/vendor/github.com/goccy/go-json/internal/decoder/compile_race.go +++ b/vendor/github.com/goccy/go-json/internal/decoder/compile_race.go @@ -13,6 +13,7 @@ import ( var decMu sync.RWMutex func CompileToGetDecoder(typ *runtime.Type) (Decoder, error) { + initDecoder() typeptr := uintptr(unsafe.Pointer(typ)) if typeptr > typeAddr.MaxTypeAddr { return compileToGetDecoderSlowPath(typeptr, typ) diff --git a/vendor/github.com/goccy/go-json/internal/encoder/compiler.go b/vendor/github.com/goccy/go-json/internal/encoder/compiler.go index 37b7aa3..b107636 100644 --- a/vendor/github.com/goccy/go-json/internal/encoder/compiler.go +++ b/vendor/github.com/goccy/go-json/internal/encoder/compiler.go @@ -5,6 +5,7 @@ import ( "encoding" "encoding/json" "reflect" + "sync" "sync/atomic" "unsafe" @@ -24,14 +25,17 @@ var ( cachedOpcodeSets []*OpcodeSet cachedOpcodeMap unsafe.Pointer // map[uintptr]*OpcodeSet typeAddr *runtime.TypeAddr + initEncoderOnce sync.Once ) -func init() { - typeAddr = runtime.AnalyzeTypeAddr() - if typeAddr == nil { - typeAddr = &runtime.TypeAddr{} - } - cachedOpcodeSets = make([]*OpcodeSet, typeAddr.AddrRange>>typeAddr.AddrShift+1) +func initEncoder() { + initEncoderOnce.Do(func() { + typeAddr = runtime.AnalyzeTypeAddr() + if typeAddr == nil { + typeAddr = &runtime.TypeAddr{} + } + cachedOpcodeSets = make([]*OpcodeSet, typeAddr.AddrRange>>typeAddr.AddrShift+1) + }) } func loadOpcodeMap() map[uintptr]*OpcodeSet { diff --git a/vendor/github.com/goccy/go-json/internal/encoder/compiler_norace.go b/vendor/github.com/goccy/go-json/internal/encoder/compiler_norace.go index 20c93cb..b6f45a4 100644 --- a/vendor/github.com/goccy/go-json/internal/encoder/compiler_norace.go +++ b/vendor/github.com/goccy/go-json/internal/encoder/compiler_norace.go @@ -4,6 +4,7 @@ package encoder func CompileToGetCodeSet(ctx *RuntimeContext, typeptr uintptr) (*OpcodeSet, error) { + initEncoder() if typeptr > typeAddr.MaxTypeAddr || typeptr < typeAddr.BaseTypeAddr { codeSet, err := compileToGetCodeSetSlowPath(typeptr) if err != nil { diff --git a/vendor/github.com/goccy/go-json/internal/encoder/compiler_race.go b/vendor/github.com/goccy/go-json/internal/encoder/compiler_race.go index 13ba23f..47b482f 100644 --- a/vendor/github.com/goccy/go-json/internal/encoder/compiler_race.go +++ b/vendor/github.com/goccy/go-json/internal/encoder/compiler_race.go @@ -10,6 +10,7 @@ import ( var setsMu sync.RWMutex func CompileToGetCodeSet(ctx *RuntimeContext, typeptr uintptr) (*OpcodeSet, error) { + initEncoder() if typeptr > typeAddr.MaxTypeAddr || typeptr < typeAddr.BaseTypeAddr { codeSet, err := compileToGetCodeSetSlowPath(typeptr) if err != nil { diff --git a/vendor/github.com/goccy/go-json/internal/encoder/encoder.go b/vendor/github.com/goccy/go-json/internal/encoder/encoder.go index 14eb6a0..b436f5b 100644 --- a/vendor/github.com/goccy/go-json/internal/encoder/encoder.go +++ b/vendor/github.com/goccy/go-json/internal/encoder/encoder.go @@ -406,6 +406,11 @@ func AppendMarshalJSON(ctx *RuntimeContext, code *Opcode, b []byte, v interface{ rv = newV } } + + if rv.Kind() == reflect.Ptr && rv.IsNil() { + return AppendNull(ctx, b), nil + } + v = rv.Interface() var bb []byte if (code.Flags & MarshalerContextFlags) != 0 { diff --git a/vendor/github.com/gophercloud/gophercloud/CHANGELOG.md b/vendor/github.com/gophercloud/gophercloud/CHANGELOG.md index dc38bf5..b19b5e7 100644 --- a/vendor/github.com/gophercloud/gophercloud/CHANGELOG.md +++ b/vendor/github.com/gophercloud/gophercloud/CHANGELOG.md @@ -1,3 +1,7 @@ +## v1.14.1 (2024-09-18) + +* [GH-3162](https://github.com/gophercloud/gophercloud/pull/3162) Fix security group rule "any protocol" + ## v1.14.0 (2024-07-24) * [GH-3095](https://github.com/gophercloud/gophercloud/pull/3095) [neutron]: introduce Description argument for the portforwarding diff --git a/vendor/github.com/gophercloud/gophercloud/provider_client.go b/vendor/github.com/gophercloud/gophercloud/provider_client.go index 6e57f48..7e25882 100644 --- a/vendor/github.com/gophercloud/gophercloud/provider_client.go +++ b/vendor/github.com/gophercloud/gophercloud/provider_client.go @@ -14,7 +14,7 @@ import ( // DefaultUserAgent is the default User-Agent string set in the request header. const ( - DefaultUserAgent = "gophercloud/v1.14.0" + DefaultUserAgent = "gophercloud/v1.14.1" DefaultMaxBackoffRetries = 60 ) diff --git a/vendor/github.com/klauspost/compress/s2/encode.go b/vendor/github.com/klauspost/compress/s2/encode.go index 0c9088a..20b8022 100644 --- a/vendor/github.com/klauspost/compress/s2/encode.go +++ b/vendor/github.com/klauspost/compress/s2/encode.go @@ -9,6 +9,9 @@ import ( "encoding/binary" "math" "math/bits" + "sync" + + "github.com/klauspost/compress/internal/race" ) // Encode returns the encoded form of src. The returned slice may be a sub- @@ -52,6 +55,8 @@ func Encode(dst, src []byte) []byte { return dst[:d] } +var estblockPool [2]sync.Pool + // EstimateBlockSize will perform a very fast compression // without outputting the result and return the compressed output size. // The function returns -1 if no improvement could be achieved. @@ -61,9 +66,25 @@ func EstimateBlockSize(src []byte) (d int) { return -1 } if len(src) <= 1024 { - d = calcBlockSizeSmall(src) + const sz, pool = 2048, 0 + tmp, ok := estblockPool[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer estblockPool[pool].Put(tmp) + + d = calcBlockSizeSmall(src, tmp) } else { - d = calcBlockSize(src) + const sz, pool = 32768, 1 + tmp, ok := estblockPool[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer estblockPool[pool].Put(tmp) + + d = calcBlockSize(src, tmp) } if d == 0 { diff --git a/vendor/github.com/klauspost/compress/s2/encode_amd64.go b/vendor/github.com/klauspost/compress/s2/encode_amd64.go index 4f45206..7aadd25 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_amd64.go +++ b/vendor/github.com/klauspost/compress/s2/encode_amd64.go @@ -3,10 +3,16 @@ package s2 -import "github.com/klauspost/compress/internal/race" +import ( + "sync" + + "github.com/klauspost/compress/internal/race" +) const hasAmd64Asm = true +var encPools [4]sync.Pool + // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. @@ -29,23 +35,60 @@ func encodeBlock(dst, src []byte) (d int) { ) if len(src) >= 4<<20 { - return encodeBlockAsm(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeBlockAsm4MB(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm4MB(dst, src, tmp) } if len(src) >= limit10B { - return encodeBlockAsm12B(dst, src) + const sz, pool = 16384, 1 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeBlockAsm10B(dst, src) + const sz, pool = 4096, 2 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeBlockAsm8B(dst, src) + const sz, pool = 1024, 3 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm8B(dst, src, tmp) } +var encBetterPools [5]sync.Pool + // encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. @@ -68,21 +111,59 @@ func encodeBlockBetter(dst, src []byte) (d int) { ) if len(src) > 4<<20 { - return encodeBetterBlockAsm(dst, src) + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeBetterBlockAsm4MB(dst, src) + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeBetterBlockAsm4MB(dst, src, tmp) } if len(src) >= limit10B { - return encodeBetterBlockAsm12B(dst, src) + const sz, pool = 81920, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeBetterBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeBetterBlockAsm10B(dst, src) + const sz, pool = 20480, 1 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeBetterBlockAsm8B(dst, src) + + const sz, pool = 5120, 2 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm8B(dst, src, tmp) } // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It @@ -105,22 +186,57 @@ func encodeBlockSnappy(dst, src []byte) (d int) { // Use 8 bit table when less than... limit8B = 512 ) - if len(src) >= 64<<10 { - return encodeSnappyBlockAsm(dst, src) + if len(src) > 65536 { + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeSnappyBlockAsm64K(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm64K(dst, src, tmp) } if len(src) >= limit10B { - return encodeSnappyBlockAsm12B(dst, src) + const sz, pool = 16384, 1 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeSnappyBlockAsm10B(dst, src) + const sz, pool = 4096, 2 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeSnappyBlockAsm8B(dst, src) + const sz, pool = 1024, 3 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm8B(dst, src, tmp) } // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It @@ -143,20 +259,59 @@ func encodeBlockBetterSnappy(dst, src []byte) (d int) { // Use 8 bit table when less than... limit8B = 512 ) - if len(src) >= 64<<10 { - return encodeSnappyBetterBlockAsm(dst, src) + if len(src) > 65536 { + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm(dst, src, tmp) } + if len(src) >= limit12B { - return encodeSnappyBetterBlockAsm64K(dst, src) + const sz, pool = 294912, 4 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeSnappyBetterBlockAsm64K(dst, src, tmp) } if len(src) >= limit10B { - return encodeSnappyBetterBlockAsm12B(dst, src) + const sz, pool = 81920, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeSnappyBetterBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeSnappyBetterBlockAsm10B(dst, src) + const sz, pool = 20480, 1 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeSnappyBetterBlockAsm8B(dst, src) + + const sz, pool = 5120, 2 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm8B(dst, src, tmp) } diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go index 6b393c3..dd1c973 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_go.go +++ b/vendor/github.com/klauspost/compress/s2/encode_go.go @@ -317,7 +317,7 @@ func matchLen(a []byte, b []byte) int { } // input must be > inputMargin -func calcBlockSize(src []byte) (d int) { +func calcBlockSize(src []byte, _ *[32768]byte) (d int) { // Initialize the hash table. const ( tableBits = 13 @@ -503,7 +503,7 @@ emitRemainder: } // length must be > inputMargin. -func calcBlockSizeSmall(src []byte) (d int) { +func calcBlockSizeSmall(src []byte, _ *[2048]byte) (d int) { // Initialize the hash table. const ( tableBits = 9 diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go index 297e415..f43aa81 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go @@ -11,154 +11,154 @@ func _dummy_() // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm(dst []byte, src []byte) int +func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // encodeBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4194304 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm4MB(dst []byte, src []byte) int +func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int // encodeBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm12B(dst []byte, src []byte) int +func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // encodeBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm10B(dst []byte, src []byte) int +func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // encodeBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm8B(dst []byte, src []byte) int +func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // encodeBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm(dst []byte, src []byte) int +func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // encodeBetterBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4194304 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm4MB(dst []byte, src []byte) int +func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int // encodeBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm12B(dst []byte, src []byte) int +func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // encodeBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm10B(dst []byte, src []byte) int +func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // encodeBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm8B(dst []byte, src []byte) int +func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // encodeSnappyBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm(dst []byte, src []byte) int +func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // encodeSnappyBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 65535 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm64K(dst []byte, src []byte) int +func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int // encodeSnappyBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm12B(dst []byte, src []byte) int +func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // encodeSnappyBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm10B(dst []byte, src []byte) int +func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // encodeSnappyBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm8B(dst []byte, src []byte) int +func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // encodeSnappyBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // encodeSnappyBetterBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 65535 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int // encodeSnappyBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // encodeSnappyBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // encodeSnappyBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // calcBlockSize encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func calcBlockSize(src []byte) int +func calcBlockSize(src []byte, tmp *[32768]byte) int // calcBlockSizeSmall encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 1024 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func calcBlockSizeSmall(src []byte) int +func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int // emitLiteral writes a literal chunk and returns the number of bytes written. // diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s index 2ff5b33..df9be68 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s @@ -13,1270 +13,1271 @@ TEXT ·_dummy_(SB), $0 #endif RET -// func encodeBlockAsm(dst []byte, src []byte) int +// func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm repeat_extend_back_loop_encodeBlockAsm: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm repeat_extend_back_end_encodeBlockAsm: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_repeat_emit_encodeBlockAsm four_bytes_repeat_emit_encodeBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeBlockAsm three_bytes_repeat_emit_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm two_bytes_repeat_emit_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm JMP memmove_long_repeat_emit_encodeBlockAsm one_byte_repeat_emit_encodeBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm memmove_long_repeat_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm matchlen_bsf_16repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_match8_repeat_extend_encodeBlockAsm: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm matchlen_bsf_8_repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_match4_repeat_extend_encodeBlockAsm: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm JB repeat_extend_forward_end_encodeBlockAsm - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm matchlen_match1_repeat_extend_encodeBlockAsm: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm // emitRepeat emit_repeat_again_match_repeat_encodeBlockAsm: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm cant_repeat_two_offset_match_repeat_encodeBlockAsm: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_match_repeat_encodeBlockAsm - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_match_repeat_encodeBlockAsm - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_repeat_encodeBlockAsm repeat_five_match_repeat_encodeBlockAsm: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_match_repeat_encodeBlockAsm: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_match_repeat_encodeBlockAsm: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_match_repeat_encodeBlockAsm: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_match_repeat_encodeBlockAsm: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_as_copy_encodeBlockAsm: // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeBlockAsm - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeBlockAsm // emitRepeat emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm four_bytes_remain_repeat_as_copy_encodeBlockAsm: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeBlockAsm - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm two_byte_offset_repeat_as_copy_encodeBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - MOVL SI, R8 - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + MOVL DI, R9 + SHRL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm long_offset_short_repeat_as_copy_encodeBlockAsm: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm two_byte_offset_short_repeat_as_copy_encodeBlockAsm: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm emit_copy_three_repeat_as_copy_encodeBlockAsm: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm no_repeat_found_encodeBlockAsm: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm candidate3_match_encodeBlockAsm: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm candidate2_match_encodeBlockAsm: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm match_extend_back_loop_encodeBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm JMP match_extend_back_loop_encodeBlockAsm match_extend_back_end_encodeBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeBlockAsm - CMPL DI, $0x01000000 + CMPL R8, $0x01000000 JB four_bytes_match_emit_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeBlockAsm four_bytes_match_emit_encodeBlockAsm: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBlockAsm three_bytes_match_emit_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm two_bytes_match_emit_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm JMP memmove_long_match_emit_encodeBlockAsm one_byte_match_emit_encodeBlockAsm: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm memmove_long_match_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm: match_nolit_loop_encodeBlockAsm: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm matchlen_bsf_16match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm matchlen_match8_match_nolit_encodeBlockAsm: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm matchlen_bsf_8_match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm matchlen_match4_match_nolit_encodeBlockAsm: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm JB match_nolit_end_encodeBlockAsm - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm matchlen_match1_match_nolit_encodeBlockAsm: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeBlockAsm - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeBlockAsm - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy repeat_five_match_nolit_encodeBlockAsm_emit_copy: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm four_bytes_remain_match_nolit_encodeBlockAsm: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeBlockAsm - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_match_nolit_encodeBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - MOVL BX, DI - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + MOVL SI, R8 + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm long_offset_short_match_nolit_encodeBlockAsm: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_short_match_nolit_encodeBlockAsm: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm emit_copy_three_match_nolit_encodeBlockAsm: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm - INCL CX + INCL DX JMP search_loop_encodeBlockAsm emit_remainder_encodeBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm @@ -1286,41 +1287,41 @@ emit_remainder_ok_encodeBlockAsm: JB three_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeBlockAsm four_bytes_emit_remainder_encodeBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBlockAsm three_bytes_emit_remainder_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm two_bytes_emit_remainder_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm JMP memmove_long_emit_remainder_encodeBlockAsm one_byte_emit_remainder_encodeBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -1336,73 +1337,73 @@ memmove_emit_remainder_encodeBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm memmove_long_emit_remainder_encodeBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -1416,1199 +1417,1200 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm4MB(dst []byte, src []byte) int +// func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm4MB(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm4MB(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm4MB: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm4MB MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm4MB: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm4MB - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm4MB - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm4MB repeat_extend_back_loop_encodeBlockAsm4MB: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm4MB - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm4MB - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm4MB repeat_extend_back_end_encodeBlockAsm4MB: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 4(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 4(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm4MB: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeBlockAsm4MB three_bytes_repeat_emit_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm4MB two_bytes_repeat_emit_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm4MB JMP memmove_long_repeat_emit_encodeBlockAsm4MB one_byte_repeat_emit_encodeBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm4MB memmove_long_repeat_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm4MB: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm4MB - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm4MB - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB matchlen_bsf_16repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match8_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm4MB - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm4MB matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match4_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm4MB - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm4MB - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm4MB JB repeat_extend_forward_end_encodeBlockAsm4MB - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm4MB - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match1_repeat_extend_encodeBlockAsm4MB: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm4MB - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm4MB: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm4MB // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm4MB - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm4MB cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm4MB - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_match_repeat_encodeBlockAsm4MB - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_match_repeat_encodeBlockAsm4MB: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_match_repeat_encodeBlockAsm4MB: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_match_repeat_encodeBlockAsm4MB: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_match_repeat_encodeBlockAsm4MB: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_as_copy_encodeBlockAsm4MB: // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeBlockAsm4MB - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm4MB - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB long_offset_short_repeat_as_copy_encodeBlockAsm4MB: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB emit_copy_three_repeat_as_copy_encodeBlockAsm4MB: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm4MB: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm4MB no_repeat_found_encodeBlockAsm4MB: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm4MB - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm4MB - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm4MB - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm4MB candidate3_match_encodeBlockAsm4MB: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm4MB candidate2_match_encodeBlockAsm4MB: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm4MB: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm4MB match_extend_back_loop_encodeBlockAsm4MB: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm4MB - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm4MB - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm4MB JMP match_extend_back_loop_encodeBlockAsm4MB match_extend_back_end_encodeBlockAsm4MB: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 4(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 4(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm4MB: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm4MB - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm4MB - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeBlockAsm4MB - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBlockAsm4MB three_bytes_match_emit_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm4MB two_bytes_match_emit_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm4MB JMP memmove_long_match_emit_encodeBlockAsm4MB one_byte_match_emit_encodeBlockAsm4MB: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm4MB: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm4MB memmove_long_match_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm4MB: match_nolit_loop_encodeBlockAsm4MB: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm4MB - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm4MB - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm4MB matchlen_bsf_16match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm4MB matchlen_match8_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm4MB - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm4MB matchlen_bsf_8_match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm4MB matchlen_match4_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm4MB - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm4MB - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm4MB JB match_nolit_end_encodeBlockAsm4MB - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm4MB - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm4MB matchlen_match1_match_nolit_encodeBlockAsm4MB: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm4MB - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm4MB: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeBlockAsm4MB - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeBlockAsm4MB - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeBlockAsm4MB // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB four_bytes_remain_match_nolit_encodeBlockAsm4MB: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeBlockAsm4MB - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB two_byte_offset_match_nolit_encodeBlockAsm4MB: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm4MB - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm4MB - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 - + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 + // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB long_offset_short_match_nolit_encodeBlockAsm4MB: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB two_byte_offset_short_match_nolit_encodeBlockAsm4MB: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm4MB - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm4MB - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB emit_copy_three_match_nolit_encodeBlockAsm4MB: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm4MB: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm4MB - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm4MB: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm4MB - INCL CX + INCL DX JMP search_loop_encodeBlockAsm4MB emit_remainder_encodeBlockAsm4MB: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 4(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 4(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm4MB: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm4MB - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm4MB @@ -2618,33 +2620,33 @@ emit_remainder_ok_encodeBlockAsm4MB: JB three_bytes_emit_remainder_encodeBlockAsm4MB MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBlockAsm4MB three_bytes_emit_remainder_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm4MB two_bytes_emit_remainder_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm4MB JMP memmove_long_emit_remainder_encodeBlockAsm4MB one_byte_emit_remainder_encodeBlockAsm4MB: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -2660,73 +2662,73 @@ memmove_emit_remainder_encodeBlockAsm4MB: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm4MB: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm4MB memmove_long_emit_remainder_encodeBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back: MOVOU (SI), X4 @@ -2740,967 +2742,968 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm4MB: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm12B(dst []byte, src []byte) int +// func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm12B(SB), $16408-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000080, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000080, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x18, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x18, R11 + IMULQ R9, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x18, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm12B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm12B repeat_extend_back_loop_encodeBlockAsm12B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm12B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm12B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm12B repeat_extend_back_end_encodeBlockAsm12B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm12B JB three_bytes_repeat_emit_encodeBlockAsm12B three_bytes_repeat_emit_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm12B two_bytes_repeat_emit_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm12B JMP memmove_long_repeat_emit_encodeBlockAsm12B one_byte_repeat_emit_encodeBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm12B memmove_long_repeat_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm12B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm12B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm12B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm12B matchlen_bsf_16repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_match8_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm12B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm12B matchlen_bsf_8_repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_match4_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm12B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm12B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm12B JB repeat_extend_forward_end_encodeBlockAsm12B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm12B matchlen_match1_repeat_extend_encodeBlockAsm12B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm12B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm12B // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm12B - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm12B cant_repeat_two_offset_match_repeat_encodeBlockAsm12B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm12B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_match_repeat_encodeBlockAsm12B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_match_repeat_encodeBlockAsm12B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_match_repeat_encodeBlockAsm12B: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_as_copy_encodeBlockAsm12B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm12B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B long_offset_short_repeat_as_copy_encodeBlockAsm12B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B emit_copy_three_repeat_as_copy_encodeBlockAsm12B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm12B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm12B no_repeat_found_encodeBlockAsm12B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm12B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm12B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm12B candidate3_match_encodeBlockAsm12B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm12B candidate2_match_encodeBlockAsm12B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm12B match_extend_back_loop_encodeBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm12B JMP match_extend_back_loop_encodeBlockAsm12B match_extend_back_end_encodeBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm12B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm12B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm12B JB three_bytes_match_emit_encodeBlockAsm12B three_bytes_match_emit_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm12B two_bytes_match_emit_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm12B JMP memmove_long_match_emit_encodeBlockAsm12B one_byte_match_emit_encodeBlockAsm12B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm12B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm12B memmove_long_match_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm12B: match_nolit_loop_encodeBlockAsm12B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm12B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm12B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm12B matchlen_bsf_16match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm12B matchlen_match8_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm12B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm12B matchlen_bsf_8_match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm12B matchlen_match4_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm12B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm12B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm12B JB match_nolit_end_encodeBlockAsm12B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm12B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm12B matchlen_match1_match_nolit_encodeBlockAsm12B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm12B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm12B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm12B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B long_offset_short_match_nolit_encodeBlockAsm12B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm12B - + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX + JMP match_nolit_emitcopy_end_encodeBlockAsm12B + two_byte_offset_short_match_nolit_encodeBlockAsm12B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm12B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B emit_copy_three_match_nolit_encodeBlockAsm12B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm12B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm12B: - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x18, DI - IMULQ R8, DI - SHRQ $0x34, DI - SHLQ $0x18, BX - IMULQ R8, BX - SHRQ $0x34, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x18, R8 + IMULQ R9, R8 + SHRQ $0x34, R8 + SHLQ $0x18, SI + IMULQ R9, SI + SHRQ $0x34, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm12B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm12B emit_remainder_encodeBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm12B @@ -3709,26 +3712,26 @@ emit_remainder_ok_encodeBlockAsm12B: JB three_bytes_emit_remainder_encodeBlockAsm12B three_bytes_emit_remainder_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm12B two_bytes_emit_remainder_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm12B JMP memmove_long_emit_remainder_encodeBlockAsm12B one_byte_emit_remainder_encodeBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -3744,73 +3747,73 @@ memmove_emit_remainder_encodeBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm12B memmove_long_emit_remainder_encodeBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -3824,967 +3827,968 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm10B(dst []byte, src []byte) int +// func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm10B(SB), $4120-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000020, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000020, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm10B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm10B repeat_extend_back_loop_encodeBlockAsm10B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm10B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm10B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm10B repeat_extend_back_end_encodeBlockAsm10B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm10B JB three_bytes_repeat_emit_encodeBlockAsm10B three_bytes_repeat_emit_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm10B two_bytes_repeat_emit_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm10B JMP memmove_long_repeat_emit_encodeBlockAsm10B one_byte_repeat_emit_encodeBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm10B memmove_long_repeat_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm10B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm10B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm10B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm10B matchlen_bsf_16repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_match8_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm10B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm10B matchlen_bsf_8_repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_match4_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm10B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm10B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm10B JB repeat_extend_forward_end_encodeBlockAsm10B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm10B matchlen_match1_repeat_extend_encodeBlockAsm10B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm10B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm10B // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm10B - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm10B cant_repeat_two_offset_match_repeat_encodeBlockAsm10B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm10B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_match_repeat_encodeBlockAsm10B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_match_repeat_encodeBlockAsm10B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_match_repeat_encodeBlockAsm10B: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_as_copy_encodeBlockAsm10B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm10B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B long_offset_short_repeat_as_copy_encodeBlockAsm10B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B emit_copy_three_repeat_as_copy_encodeBlockAsm10B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm10B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm10B no_repeat_found_encodeBlockAsm10B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm10B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm10B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm10B candidate3_match_encodeBlockAsm10B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm10B candidate2_match_encodeBlockAsm10B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm10B match_extend_back_loop_encodeBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm10B JMP match_extend_back_loop_encodeBlockAsm10B match_extend_back_end_encodeBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm10B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm10B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm10B JB three_bytes_match_emit_encodeBlockAsm10B three_bytes_match_emit_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm10B two_bytes_match_emit_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm10B JMP memmove_long_match_emit_encodeBlockAsm10B one_byte_match_emit_encodeBlockAsm10B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm10B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm10B memmove_long_match_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm10B: match_nolit_loop_encodeBlockAsm10B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm10B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm10B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm10B matchlen_bsf_16match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm10B matchlen_match8_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm10B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm10B matchlen_bsf_8_match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm10B matchlen_match4_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm10B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm10B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm10B JB match_nolit_end_encodeBlockAsm10B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm10B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm10B matchlen_match1_match_nolit_encodeBlockAsm10B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm10B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm10B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm10B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B long_offset_short_match_nolit_encodeBlockAsm10B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B two_byte_offset_short_match_nolit_encodeBlockAsm10B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm10B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B emit_copy_three_match_nolit_encodeBlockAsm10B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm10B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm10B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x36, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x36, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x36, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x36, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm10B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm10B emit_remainder_encodeBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm10B @@ -4793,26 +4797,26 @@ emit_remainder_ok_encodeBlockAsm10B: JB three_bytes_emit_remainder_encodeBlockAsm10B three_bytes_emit_remainder_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm10B two_bytes_emit_remainder_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm10B JMP memmove_long_emit_remainder_encodeBlockAsm10B one_byte_emit_remainder_encodeBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -4828,73 +4832,73 @@ memmove_emit_remainder_encodeBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm10B memmove_long_emit_remainder_encodeBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -4908,943 +4912,944 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm8B(dst []byte, src []byte) int +// func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm8B(SB), $1048-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000008, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000008, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 - JNE no_repeat_found_encodeBlockAsm8B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x38, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 + JNE no_repeat_found_encodeBlockAsm8B + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm8B repeat_extend_back_loop_encodeBlockAsm8B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm8B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm8B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm8B repeat_extend_back_end_encodeBlockAsm8B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm8B JB three_bytes_repeat_emit_encodeBlockAsm8B three_bytes_repeat_emit_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm8B two_bytes_repeat_emit_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm8B JMP memmove_long_repeat_emit_encodeBlockAsm8B one_byte_repeat_emit_encodeBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm8B memmove_long_repeat_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm8B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm8B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm8B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm8B matchlen_bsf_16repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_match8_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm8B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm8B matchlen_bsf_8_repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_match4_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm8B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm8B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm8B JB repeat_extend_forward_end_encodeBlockAsm8B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm8B matchlen_match1_repeat_extend_encodeBlockAsm8B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm8B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm8B // emitRepeat - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm8B - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B cant_repeat_two_offset_match_repeat_encodeBlockAsm8B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm8B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_match_repeat_encodeBlockAsm8B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_match_repeat_encodeBlockAsm8B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_as_copy_encodeBlockAsm8B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm8B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B long_offset_short_repeat_as_copy_encodeBlockAsm8B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm8B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B emit_copy_three_repeat_as_copy_encodeBlockAsm8B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm8B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm8B no_repeat_found_encodeBlockAsm8B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm8B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm8B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm8B candidate3_match_encodeBlockAsm8B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm8B candidate2_match_encodeBlockAsm8B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm8B match_extend_back_loop_encodeBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm8B JMP match_extend_back_loop_encodeBlockAsm8B match_extend_back_end_encodeBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm8B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm8B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm8B JB three_bytes_match_emit_encodeBlockAsm8B three_bytes_match_emit_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm8B two_bytes_match_emit_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm8B JMP memmove_long_match_emit_encodeBlockAsm8B one_byte_match_emit_encodeBlockAsm8B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm8B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm8B memmove_long_match_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) - ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 + ADDQ $0x20, R12 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm8B: match_nolit_loop_encodeBlockAsm8B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm8B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm8B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm8B matchlen_bsf_16match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm8B matchlen_match8_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm8B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm8B matchlen_bsf_8_match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm8B matchlen_match4_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm8B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm8B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm8B JB match_nolit_end_encodeBlockAsm8B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm8B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm8B matchlen_match1_match_nolit_encodeBlockAsm8B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm8B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm8B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm8B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm8B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - MOVL R9, BX - LEAL -4(R9), R9 - CMPL BX, $0x08 + MOVL R10, SI + LEAL -4(R10), R10 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B long_offset_short_match_nolit_encodeBlockAsm8B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, BX - LEAL -4(R9), R9 - CMPL BX, $0x08 + MOVL R10, SI + LEAL -4(R10), R10 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B two_byte_offset_short_match_nolit_encodeBlockAsm8B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm8B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B emit_copy_three_match_nolit_encodeBlockAsm8B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm8B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm8B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x38, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x38, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x38, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x38, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm8B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm8B emit_remainder_encodeBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm8B @@ -5853,26 +5858,26 @@ emit_remainder_ok_encodeBlockAsm8B: JB three_bytes_emit_remainder_encodeBlockAsm8B three_bytes_emit_remainder_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm8B two_bytes_emit_remainder_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm8B JMP memmove_long_emit_remainder_encodeBlockAsm8B one_byte_emit_remainder_encodeBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -5888,73 +5893,73 @@ memmove_emit_remainder_encodeBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm8B memmove_long_emit_remainder_encodeBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -5968,961 +5973,962 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm(dst []byte, src []byte) int +// func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeBetterBlockAsm - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeBetterBlockAsm check_maxskip_ok_encodeBetterBlockAsm: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeBetterBlockAsm: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm no_short_found_encodeBetterBlockAsm: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm candidateS_match_encodeBetterBlockAsm: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm match_extend_back_loop_encodeBetterBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm JMP match_extend_back_loop_encodeBetterBlockAsm match_extend_back_end_encodeBetterBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm matchlen_bsf_16match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm matchlen_match8_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm matchlen_bsf_8_match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm matchlen_match4_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm JB match_nolit_end_encodeBetterBlockAsm - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm matchlen_match1_match_nolit_encodeBetterBlockAsm: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeBetterBlockAsm - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeBetterBlockAsm - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeBetterBlockAsm match_length_ok_encodeBetterBlockAsm: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeBetterBlockAsm four_bytes_match_emit_encodeBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBetterBlockAsm three_bytes_match_emit_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm two_bytes_match_emit_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm JMP memmove_long_match_emit_encodeBetterBlockAsm one_byte_match_emit_encodeBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm memmove_long_match_emit_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeBetterBlockAsm - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeBetterBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm four_bytes_remain_match_nolit_encodeBetterBlockAsm: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm two_byte_offset_match_nolit_encodeBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - MOVL DI, R8 - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + MOVL R8, R9 + SHRL $0x08, R9 + SHLL $0x05, R9 + ORL R9, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm long_offset_short_match_nolit_encodeBetterBlockAsm: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm two_byte_offset_short_match_nolit_encodeBetterBlockAsm: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm emit_copy_three_match_nolit_encodeBetterBlockAsm: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm match_is_repeat_encodeBetterBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_repeat_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm four_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm three_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm two_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm one_byte_match_emit_repeat_encodeBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm memmove_long_match_emit_repeat_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_repeat_encodeBetterBlockAsm - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm repeat_five_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_repeat_encodeBetterBlockAsm: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm - CMPQ AX, (SP) - JB match_nolit_dst_ok_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) - RET - -match_nolit_dst_ok_encodeBetterBlockAsm: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + CMPQ CX, (SP) + JB match_nolit_dst_ok_encodeBetterBlockAsm + MOVQ $0x00000000, ret+56(FP) + RET + +match_nolit_dst_ok_encodeBetterBlockAsm: + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm emit_remainder_encodeBetterBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm @@ -6932,41 +6938,41 @@ emit_remainder_ok_encodeBetterBlockAsm: JB three_bytes_emit_remainder_encodeBetterBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm four_bytes_emit_remainder_encodeBetterBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm three_bytes_emit_remainder_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm two_bytes_emit_remainder_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm JMP memmove_long_emit_remainder_encodeBetterBlockAsm one_byte_emit_remainder_encodeBetterBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -6982,73 +6988,73 @@ memmove_emit_remainder_encodeBetterBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm memmove_long_emit_remainder_encodeBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -7062,903 +7068,904 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm4MB(dst []byte, src []byte) int +// func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm4MB(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm4MB(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm4MB: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm4MB MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm4MB: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeBetterBlockAsm4MB - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeBetterBlockAsm4MB check_maxskip_ok_encodeBetterBlockAsm4MB: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeBetterBlockAsm4MB: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm4MB - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm4MB - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm4MB - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm4MB no_short_found_encodeBetterBlockAsm4MB: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm4MB - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm4MB - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm4MB candidateS_match_encodeBetterBlockAsm4MB: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm4MB - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm4MB: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm4MB match_extend_back_loop_encodeBetterBlockAsm4MB: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm4MB - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm4MB - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm4MB JMP match_extend_back_loop_encodeBetterBlockAsm4MB match_extend_back_end_encodeBetterBlockAsm4MB: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 4(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 4(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm4MB: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm4MB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm4MB matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm4MB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm4MB - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm4MB - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB JB match_nolit_end_encodeBetterBlockAsm4MB - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm4MB matchlen_match1_match_nolit_encodeBetterBlockAsm4MB: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm4MB - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm4MB: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm4MB - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeBetterBlockAsm4MB - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeBetterBlockAsm4MB - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeBetterBlockAsm4MB match_length_ok_encodeBetterBlockAsm4MB: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeBetterBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB three_bytes_match_emit_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB two_bytes_match_emit_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_encodeBetterBlockAsm4MB one_byte_match_emit_encodeBetterBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm4MB memmove_long_match_emit_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm4MB: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm4MB - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB long_offset_short_match_nolit_encodeBetterBlockAsm4MB: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB emit_copy_three_match_nolit_encodeBetterBlockAsm4MB: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB match_is_repeat_encodeBetterBlockAsm4MB: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm4MB: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm4MB - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm4MB: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm4MB: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm4MB - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm4MB emit_remainder_encodeBetterBlockAsm4MB: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 4(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 4(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm4MB: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm4MB @@ -7968,33 +7975,33 @@ emit_remainder_ok_encodeBetterBlockAsm4MB: JB three_bytes_emit_remainder_encodeBetterBlockAsm4MB MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB three_bytes_emit_remainder_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB two_bytes_emit_remainder_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm4MB JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB one_byte_emit_remainder_encodeBetterBlockAsm4MB: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -8010,73 +8017,73 @@ memmove_emit_remainder_encodeBetterBlockAsm4MB: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB memmove_long_emit_remainder_encodeBetterBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back: MOVOU (SI), X4 @@ -8090,756 +8097,757 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm12B(dst []byte, src []byte) int +// func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm12B(SB), $81944-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000280, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000280, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 65560(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 65560(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL 65536(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 65536(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm12B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm12B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm12B no_short_found_encodeBetterBlockAsm12B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm12B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm12B candidateS_match_encodeBetterBlockAsm12B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm12B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm12B match_extend_back_loop_encodeBetterBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm12B JMP match_extend_back_loop_encodeBetterBlockAsm12B match_extend_back_end_encodeBetterBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm12B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm12B matchlen_match8_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm12B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm12B matchlen_match4_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm12B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm12B JB match_nolit_end_encodeBetterBlockAsm12B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm12B matchlen_match1_match_nolit_encodeBetterBlockAsm12B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm12B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm12B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm12B JB three_bytes_match_emit_encodeBetterBlockAsm12B three_bytes_match_emit_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm12B two_bytes_match_emit_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm12B JMP memmove_long_match_emit_encodeBetterBlockAsm12B one_byte_match_emit_encodeBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm12B memmove_long_match_emit_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm12B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B long_offset_short_match_nolit_encodeBetterBlockAsm12B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B emit_copy_three_match_nolit_encodeBetterBlockAsm12B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B match_is_repeat_encodeBetterBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm12B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm12B three_bytes_match_emit_repeat_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B two_bytes_match_emit_repeat_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm12B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B one_byte_match_emit_repeat_encodeBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B memmove_long_match_emit_repeat_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm12B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm12B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x34, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x34, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 65560(SP)(R10*4) - MOVL R13, 65560(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x34, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x32, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x34, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 65536(AX)(R11*4) + MOVL R14, 65536(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm12B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm12B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x32, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm12B emit_remainder_encodeBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm12B @@ -8848,26 +8856,26 @@ emit_remainder_ok_encodeBetterBlockAsm12B: JB three_bytes_emit_remainder_encodeBetterBlockAsm12B three_bytes_emit_remainder_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B two_bytes_emit_remainder_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B one_byte_emit_remainder_encodeBetterBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -8883,73 +8891,73 @@ memmove_emit_remainder_encodeBetterBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm12B memmove_long_emit_remainder_encodeBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -8963,756 +8971,757 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 - JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm10B(dst []byte, src []byte) int +// func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm10B(SB), $20504-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x000000a0, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x000000a0, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 16408(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 16408(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL 16384(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 16384(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm10B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm10B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm10B no_short_found_encodeBetterBlockAsm10B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm10B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm10B candidateS_match_encodeBetterBlockAsm10B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm10B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm10B match_extend_back_loop_encodeBetterBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm10B JMP match_extend_back_loop_encodeBetterBlockAsm10B match_extend_back_end_encodeBetterBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm10B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm10B matchlen_match8_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm10B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm10B matchlen_match4_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm10B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm10B JB match_nolit_end_encodeBetterBlockAsm10B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm10B matchlen_match1_match_nolit_encodeBetterBlockAsm10B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm10B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm10B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm10B JB three_bytes_match_emit_encodeBetterBlockAsm10B three_bytes_match_emit_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm10B two_bytes_match_emit_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm10B JMP memmove_long_match_emit_encodeBetterBlockAsm10B one_byte_match_emit_encodeBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm10B memmove_long_match_emit_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm10B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B long_offset_short_match_nolit_encodeBetterBlockAsm10B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B emit_copy_three_match_nolit_encodeBetterBlockAsm10B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B match_is_repeat_encodeBetterBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm10B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm10B three_bytes_match_emit_repeat_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B two_bytes_match_emit_repeat_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm10B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B one_byte_match_emit_repeat_encodeBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B memmove_long_match_emit_repeat_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm10B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm10B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x36, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x34, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x36, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 16408(SP)(R10*4) - MOVL R13, 16408(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x36, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x34, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x36, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 16384(AX)(R11*4) + MOVL R14, 16384(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm10B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm10B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x34, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm10B emit_remainder_encodeBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm10B @@ -9721,26 +9730,26 @@ emit_remainder_ok_encodeBetterBlockAsm10B: JB three_bytes_emit_remainder_encodeBetterBlockAsm10B three_bytes_emit_remainder_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B two_bytes_emit_remainder_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B one_byte_emit_remainder_encodeBetterBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -9756,73 +9765,73 @@ memmove_emit_remainder_encodeBetterBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm10B memmove_long_emit_remainder_encodeBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -9836,742 +9845,743 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm8B(dst []byte, src []byte) int +// func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm8B(SB), $5144-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000028, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000028, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 4120(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 4120(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL 4096(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 4096(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm8B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm8B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm8B no_short_found_encodeBetterBlockAsm8B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm8B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm8B candidateS_match_encodeBetterBlockAsm8B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm8B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm8B match_extend_back_loop_encodeBetterBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm8B JMP match_extend_back_loop_encodeBetterBlockAsm8B match_extend_back_end_encodeBetterBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm8B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm8B matchlen_match8_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm8B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm8B matchlen_match4_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm8B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm8B JB match_nolit_end_encodeBetterBlockAsm8B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm8B matchlen_match1_match_nolit_encodeBetterBlockAsm8B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm8B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm8B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm8B JB three_bytes_match_emit_encodeBetterBlockAsm8B three_bytes_match_emit_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm8B two_bytes_match_emit_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm8B JMP memmove_long_match_emit_encodeBetterBlockAsm8B one_byte_match_emit_encodeBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm8B memmove_long_match_emit_encodeBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm8B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B long_offset_short_match_nolit_encodeBetterBlockAsm8B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B -repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX +repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX + JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm8B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B emit_copy_three_match_nolit_encodeBetterBlockAsm8B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B match_is_repeat_encodeBetterBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm8B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm8B three_bytes_match_emit_repeat_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B two_bytes_match_emit_repeat_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm8B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B one_byte_match_emit_repeat_encodeBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x04 + CMPQ R8, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4 - CMPQ DI, $0x08 + CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4: - MOVL (R8), R9 - MOVL R9, (AX) + MOVL (R9), R10 + MOVL R10, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (R8), R9 - MOVL -4(R8)(DI*1), R8 - MOVL R9, (AX) - MOVL R8, -4(AX)(DI*1) + MOVL (R9), R10 + MOVL -4(R9)(R8*1), R9 + MOVL R10, (CX) + MOVL R9, -4(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B memmove_long_match_emit_repeat_encodeBetterBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R12 - SUBQ R9, R12 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R11 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R12*1), R9 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R10 ADDQ $0x20, R13 - ADDQ $0x20, R9 - ADDQ $0x20, R12 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R12*1), X4 - MOVOU -16(R8)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ DI, R12 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm8B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm8B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x38, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x36, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x38, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 4120(SP)(R10*4) - MOVL R13, 4120(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x38, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x36, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x38, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 4096(AX)(R11*4) + MOVL R14, 4096(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm8B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm8B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x36, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm8B emit_remainder_encodeBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm8B @@ -10580,26 +10590,26 @@ emit_remainder_ok_encodeBetterBlockAsm8B: JB three_bytes_emit_remainder_encodeBetterBlockAsm8B three_bytes_emit_remainder_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B two_bytes_emit_remainder_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B one_byte_emit_remainder_encodeBetterBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -10615,73 +10625,73 @@ memmove_emit_remainder_encodeBetterBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm8B memmove_long_emit_remainder_encodeBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -10695,798 +10705,799 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm(dst []byte, src []byte) int +// func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm repeat_extend_back_loop_encodeSnappyBlockAsm: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm repeat_extend_back_end_encodeSnappyBlockAsm: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm four_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVL BX, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL SI, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm three_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm two_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm JMP memmove_long_repeat_emit_encodeSnappyBlockAsm one_byte_repeat_emit_encodeSnappyBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm memmove_long_repeat_emit_encodeSnappyBlockAsm: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match8_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match4_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm JB repeat_extend_forward_end_encodeSnappyBlockAsm - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match1_repeat_extend_encodeSnappyBlockAsm: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeSnappyBlockAsm - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeSnappyBlockAsm two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm no_repeat_found_encodeSnappyBlockAsm: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm candidate3_match_encodeSnappyBlockAsm: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm candidate2_match_encodeSnappyBlockAsm: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm match_extend_back_loop_encodeSnappyBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm JMP match_extend_back_loop_encodeSnappyBlockAsm match_extend_back_end_encodeSnappyBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x01000000 + CMPL R8, $0x01000000 JB four_bytes_match_emit_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm four_bytes_match_emit_encodeSnappyBlockAsm: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm three_bytes_match_emit_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm two_bytes_match_emit_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm JMP memmove_long_match_emit_encodeSnappyBlockAsm one_byte_match_emit_encodeSnappyBlockAsm: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm memmove_long_match_emit_encodeSnappyBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm: match_nolit_loop_encodeSnappyBlockAsm: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm matchlen_bsf_16match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm matchlen_match8_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm matchlen_match4_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm JB match_nolit_end_encodeSnappyBlockAsm - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm matchlen_match1_match_nolit_encodeSnappyBlockAsm: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeSnappyBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeSnappyBlockAsm - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeSnappyBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm four_bytes_remain_match_nolit_encodeSnappyBlockAsm: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm two_byte_offset_match_nolit_encodeSnappyBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBlockAsm: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm emit_copy_three_match_nolit_encodeSnappyBlockAsm: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm emit_remainder_encodeSnappyBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm @@ -11496,41 +11507,41 @@ emit_remainder_ok_encodeSnappyBlockAsm: JB three_bytes_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm four_bytes_emit_remainder_encodeSnappyBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm three_bytes_emit_remainder_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm two_bytes_emit_remainder_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBlockAsm one_byte_emit_remainder_encodeSnappyBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -11546,73 +11557,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm memmove_long_emit_remainder_encodeSnappyBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -11626,718 +11637,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm64K(dst []byte, src []byte) int +// func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm64K(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm64K(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm64K: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm64K MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm64K: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm64K - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm64K - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm64K repeat_extend_back_loop_encodeSnappyBlockAsm64K: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm64K - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm64K - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm64K repeat_extend_back_end_encodeSnappyBlockAsm64K: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm64K: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm64K - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm64K JB three_bytes_repeat_emit_encodeSnappyBlockAsm64K three_bytes_repeat_emit_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K two_bytes_repeat_emit_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm64K JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K one_byte_repeat_emit_encodeSnappyBlockAsm64K: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K memmove_long_repeat_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K JB repeat_extend_forward_end_encodeSnappyBlockAsm64K - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm64K - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm64K: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm64K emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm64K: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm64K no_repeat_found_encodeSnappyBlockAsm64K: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm64K - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm64K - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm64K - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm64K candidate3_match_encodeSnappyBlockAsm64K: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm64K candidate2_match_encodeSnappyBlockAsm64K: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm64K: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm64K match_extend_back_loop_encodeSnappyBlockAsm64K: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm64K - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm64K - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm64K JMP match_extend_back_loop_encodeSnappyBlockAsm64K match_extend_back_end_encodeSnappyBlockAsm64K: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm64K: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm64K - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm64K - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm64K JB three_bytes_match_emit_encodeSnappyBlockAsm64K three_bytes_match_emit_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm64K two_bytes_match_emit_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm64K JMP memmove_long_match_emit_encodeSnappyBlockAsm64K one_byte_match_emit_encodeSnappyBlockAsm64K: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm64K: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm64K memmove_long_match_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm64K: match_nolit_loop_encodeSnappyBlockAsm64K: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm64K - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm64K matchlen_match8_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm64K - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm64K matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm64K matchlen_match4_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm64K - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm64K - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K JB match_nolit_end_encodeSnappyBlockAsm64K - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm64K matchlen_match1_match_nolit_encodeSnappyBlockAsm64K: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm64K - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm64K: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm64K: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm64K two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm64K emit_copy_three_match_nolit_encodeSnappyBlockAsm64K: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm64K: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm64K - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm64K: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm64K - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm64K emit_remainder_encodeSnappyBlockAsm64K: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm64K: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm64K @@ -12346,26 +12358,26 @@ emit_remainder_ok_encodeSnappyBlockAsm64K: JB three_bytes_emit_remainder_encodeSnappyBlockAsm64K three_bytes_emit_remainder_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K two_bytes_emit_remainder_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm64K JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K one_byte_emit_remainder_encodeSnappyBlockAsm64K: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -12381,73 +12393,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm64K: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K -emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) +emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7: + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K memmove_long_emit_remainder_encodeSnappyBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back: MOVOU (SI), X4 @@ -12461,718 +12473,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm12B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000080, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000080, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x18, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x18, R11 + IMULQ R9, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x18, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm12B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm12B repeat_extend_back_loop_encodeSnappyBlockAsm12B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm12B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm12B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B repeat_extend_back_end_encodeSnappyBlockAsm12B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm12B JB three_bytes_repeat_emit_encodeSnappyBlockAsm12B three_bytes_repeat_emit_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B two_bytes_repeat_emit_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm12B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B one_byte_repeat_emit_encodeSnappyBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B memmove_long_repeat_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B JB repeat_extend_forward_end_encodeSnappyBlockAsm12B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm12B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm12B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm12B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm12B no_repeat_found_encodeSnappyBlockAsm12B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm12B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm12B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm12B candidate3_match_encodeSnappyBlockAsm12B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm12B candidate2_match_encodeSnappyBlockAsm12B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm12B match_extend_back_loop_encodeSnappyBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm12B JMP match_extend_back_loop_encodeSnappyBlockAsm12B match_extend_back_end_encodeSnappyBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm12B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm12B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm12B JB three_bytes_match_emit_encodeSnappyBlockAsm12B three_bytes_match_emit_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm12B two_bytes_match_emit_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBlockAsm12B one_byte_match_emit_encodeSnappyBlockAsm12B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm12B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B memmove_long_match_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm12B: match_nolit_loop_encodeSnappyBlockAsm12B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm12B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm12B matchlen_match8_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm12B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm12B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm12B matchlen_match4_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm12B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm12B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B JB match_nolit_end_encodeSnappyBlockAsm12B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm12B matchlen_match1_match_nolit_encodeSnappyBlockAsm12B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm12B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm12B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm12B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBlockAsm12B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm12B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm12B: - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x18, DI - IMULQ R8, DI - SHRQ $0x34, DI - SHLQ $0x18, BX - IMULQ R8, BX - SHRQ $0x34, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x18, R8 + IMULQ R9, R8 + SHRQ $0x34, R8 + SHLQ $0x18, SI + IMULQ R9, SI + SHRQ $0x34, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm12B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm12B emit_remainder_encodeSnappyBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm12B @@ -13181,26 +13194,26 @@ emit_remainder_ok_encodeSnappyBlockAsm12B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm12B three_bytes_emit_remainder_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B two_bytes_emit_remainder_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B one_byte_emit_remainder_encodeSnappyBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -13216,73 +13229,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B memmove_long_emit_remainder_encodeSnappyBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -13296,718 +13309,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm10B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000020, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000020, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm10B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm10B repeat_extend_back_loop_encodeSnappyBlockAsm10B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm10B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm10B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B repeat_extend_back_end_encodeSnappyBlockAsm10B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm10B JB three_bytes_repeat_emit_encodeSnappyBlockAsm10B three_bytes_repeat_emit_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B two_bytes_repeat_emit_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm10B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B one_byte_repeat_emit_encodeSnappyBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B memmove_long_repeat_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B JB repeat_extend_forward_end_encodeSnappyBlockAsm10B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm10B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm10B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm10B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm10B no_repeat_found_encodeSnappyBlockAsm10B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm10B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm10B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm10B candidate3_match_encodeSnappyBlockAsm10B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm10B candidate2_match_encodeSnappyBlockAsm10B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm10B match_extend_back_loop_encodeSnappyBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm10B JMP match_extend_back_loop_encodeSnappyBlockAsm10B match_extend_back_end_encodeSnappyBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm10B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm10B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm10B JB three_bytes_match_emit_encodeSnappyBlockAsm10B three_bytes_match_emit_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm10B two_bytes_match_emit_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBlockAsm10B one_byte_match_emit_encodeSnappyBlockAsm10B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm10B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B memmove_long_match_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm10B: match_nolit_loop_encodeSnappyBlockAsm10B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm10B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm10B matchlen_match8_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm10B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm10B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm10B matchlen_match4_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm10B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm10B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B JB match_nolit_end_encodeSnappyBlockAsm10B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm10B matchlen_match1_match_nolit_encodeSnappyBlockAsm10B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm10B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm10B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm10B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBlockAsm10B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm10B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm10B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x36, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x36, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x36, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x36, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm10B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm10B emit_remainder_encodeSnappyBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm10B @@ -14016,26 +14030,26 @@ emit_remainder_ok_encodeSnappyBlockAsm10B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm10B three_bytes_emit_remainder_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B two_bytes_emit_remainder_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B one_byte_emit_remainder_encodeSnappyBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -14051,73 +14065,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B memmove_long_emit_remainder_encodeSnappyBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -14131,714 +14145,715 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm8B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000008, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000008, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x38, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm8B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm8B repeat_extend_back_loop_encodeSnappyBlockAsm8B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm8B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm8B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B repeat_extend_back_end_encodeSnappyBlockAsm8B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm8B JB three_bytes_repeat_emit_encodeSnappyBlockAsm8B three_bytes_repeat_emit_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B two_bytes_repeat_emit_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm8B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B one_byte_repeat_emit_encodeSnappyBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B memmove_long_repeat_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B JB repeat_extend_forward_end_encodeSnappyBlockAsm8B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm8B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm8B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm8B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm8B no_repeat_found_encodeSnappyBlockAsm8B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm8B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm8B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm8B candidate3_match_encodeSnappyBlockAsm8B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm8B candidate2_match_encodeSnappyBlockAsm8B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm8B match_extend_back_loop_encodeSnappyBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm8B JMP match_extend_back_loop_encodeSnappyBlockAsm8B match_extend_back_end_encodeSnappyBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm8B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm8B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm8B JB three_bytes_match_emit_encodeSnappyBlockAsm8B three_bytes_match_emit_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm8B two_bytes_match_emit_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBlockAsm8B one_byte_match_emit_encodeSnappyBlockAsm8B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm8B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B memmove_long_match_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm8B: match_nolit_loop_encodeSnappyBlockAsm8B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm8B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm8B matchlen_match8_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm8B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm8B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm8B matchlen_match4_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm8B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm8B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B JB match_nolit_end_encodeSnappyBlockAsm8B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm8B matchlen_match1_match_nolit_encodeSnappyBlockAsm8B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm8B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm8B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm8B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBlockAsm8B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm8B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm8B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x38, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x38, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x38, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x38, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm8B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm8B emit_remainder_encodeSnappyBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm8B @@ -14847,26 +14862,26 @@ emit_remainder_ok_encodeSnappyBlockAsm8B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm8B three_bytes_emit_remainder_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B two_bytes_emit_remainder_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B one_byte_emit_remainder_encodeSnappyBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -14882,73 +14897,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B memmove_long_emit_remainder_encodeSnappyBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -14962,520 +14977,521 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeSnappyBetterBlockAsm - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeSnappyBetterBlockAsm check_maxskip_ok_encodeSnappyBetterBlockAsm: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeSnappyBetterBlockAsm: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm no_short_found_encodeSnappyBetterBlockAsm: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm candidateS_match_encodeSnappyBetterBlockAsm: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm match_extend_back_loop_encodeSnappyBetterBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm JMP match_extend_back_loop_encodeSnappyBetterBlockAsm match_extend_back_end_encodeSnappyBetterBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm JB match_nolit_end_encodeSnappyBetterBlockAsm - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeSnappyBetterBlockAsm - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeSnappyBetterBlockAsm - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeSnappyBetterBlockAsm match_length_ok_encodeSnappyBetterBlockAsm: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_encodeSnappyBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm four_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm three_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm two_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm one_byte_match_emit_encodeSnappyBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm memmove_long_match_emit_encodeSnappyBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm @@ -15485,41 +15501,41 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeSnappyBetterBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm four_bytes_emit_remainder_encodeSnappyBetterBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm three_bytes_emit_remainder_encodeSnappyBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm two_bytes_emit_remainder_encodeSnappyBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm one_byte_emit_remainder_encodeSnappyBetterBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -15535,73 +15551,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm memmove_long_emit_remainder_encodeSnappyBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -15615,463 +15631,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_ba JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm64K(SB), $327704-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000a00, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm64K(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000900, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm64K: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm64K MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm64K: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm64K - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x30, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 262168(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 262168(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x30, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x33, R11 + MOVL (AX)(R10*4), SI + MOVL 262144(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 262144(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm64K - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm64K no_short_found_encodeSnappyBetterBlockAsm64K: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm64K - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm64K candidateS_match_encodeSnappyBetterBlockAsm64K: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x30, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x30, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm64K: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K match_extend_back_loop_encodeSnappyBetterBlockAsm64K: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm64K - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm64K - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K JMP match_extend_back_loop_encodeSnappyBetterBlockAsm64K match_extend_back_end_encodeSnappyBetterBlockAsm64K: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm64K: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K JB match_nolit_end_encodeSnappyBetterBlockAsm64K - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm64K - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm64K: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm64K - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm64K JB three_bytes_match_emit_encodeSnappyBetterBlockAsm64K three_bytes_match_emit_encodeSnappyBetterBlockAsm64K: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K two_bytes_match_emit_encodeSnappyBetterBlockAsm64K: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm64K JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K one_byte_match_emit_encodeSnappyBetterBlockAsm64K: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K memmove_long_match_emit_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back - -emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + +emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm64K - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x30, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x30, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 262168(SP)(R10*4) - MOVL R13, 262168(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x30, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x33, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x30, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x33, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 262144(AX)(R11*4) + MOVL R14, 262144(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm64K: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm64K - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x30, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x30, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x30, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm64K emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm64K: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K @@ -16080,26 +16097,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm64K: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm64K JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -16115,73 +16132,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm64K: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: MOVOU (SI), X4 @@ -16195,463 +16212,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm12B(SB), $81944-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000280, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000280, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 65560(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 65560(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL 65536(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 65536(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm12B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm12B no_short_found_encodeSnappyBetterBlockAsm12B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm12B candidateS_match_encodeSnappyBetterBlockAsm12B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B match_extend_back_loop_encodeSnappyBetterBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm12B match_extend_back_end_encodeSnappyBetterBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm12B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B JB match_nolit_end_encodeSnappyBetterBlockAsm12B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm12B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm12B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm12B three_bytes_match_emit_encodeSnappyBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B two_bytes_match_emit_encodeSnappyBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B one_byte_match_emit_encodeSnappyBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B memmove_long_match_emit_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm12B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x34, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x34, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 65560(SP)(R10*4) - MOVL R13, 65560(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x34, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x32, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x34, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 65536(AX)(R11*4) + MOVL R14, 65536(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm12B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm12B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x32, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm12B emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B @@ -16660,26 +16678,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm12B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -16695,73 +16713,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) - JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B - -emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) + JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B + +emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -16775,463 +16793,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm10B(SB), $20504-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x000000a0, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x000000a0, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 16408(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 16408(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL 16384(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 16384(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm10B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm10B no_short_found_encodeSnappyBetterBlockAsm10B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm10B candidateS_match_encodeSnappyBetterBlockAsm10B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B match_extend_back_loop_encodeSnappyBetterBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm10B match_extend_back_end_encodeSnappyBetterBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm10B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B JB match_nolit_end_encodeSnappyBetterBlockAsm10B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm10B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm10B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm10B three_bytes_match_emit_encodeSnappyBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B two_bytes_match_emit_encodeSnappyBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B one_byte_match_emit_encodeSnappyBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B memmove_long_match_emit_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm10B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x36, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x34, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x36, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 16408(SP)(R10*4) - MOVL R13, 16408(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x36, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x34, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x36, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 16384(AX)(R11*4) + MOVL R14, 16384(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm10B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm10B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x34, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm10B emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B @@ -17240,26 +17259,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm10B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -17275,73 +17294,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -17355,461 +17374,462 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm8B(SB), $5144-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000028, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000028, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 4120(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 4120(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL 4096(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 4096(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm8B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm8B no_short_found_encodeSnappyBetterBlockAsm8B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm8B candidateS_match_encodeSnappyBetterBlockAsm8B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B match_extend_back_loop_encodeSnappyBetterBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm8B match_extend_back_end_encodeSnappyBetterBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm8B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 - JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R11), R11 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 + JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B JB match_nolit_end_encodeSnappyBetterBlockAsm8B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm8B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm8B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm8B three_bytes_match_emit_encodeSnappyBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B two_bytes_match_emit_encodeSnappyBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B one_byte_match_emit_encodeSnappyBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B memmove_long_match_emit_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm8B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x38, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x36, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x38, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 4120(SP)(R10*4) - MOVL R13, 4120(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x38, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x36, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x38, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 4096(AX)(R11*4) + MOVL R14, 4096(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm8B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm8B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x36, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm8B emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B @@ -17818,26 +17838,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm8B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -17853,73 +17873,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -17933,1136 +17953,1142 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_ JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func calcBlockSize(src []byte) int +// func calcBlockSize(src []byte, tmp *[32768]byte) int // Requires: BMI, SSE2 -TEXT ·calcBlockSize(SB), $32792-32 - XORQ AX, AX - MOVQ $0x00000100, CX - LEAQ 24(SP), DX +TEXT ·calcBlockSize(SB), $24-40 + MOVQ tmp+24(FP), AX + XORQ CX, CX + MOVQ $0x00000100, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_calcBlockSize: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_calcBlockSize MOVL $0x00000000, 12(SP) - MOVQ src_len+8(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+0(FP), BX search_loop_calcBlockSize: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_calcBlockSize - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x33, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x33, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x33, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x33, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x33, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_calcBlockSize - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_calcBlockSize repeat_extend_back_loop_calcBlockSize: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_calcBlockSize - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_calcBlockSize - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_calcBlockSize repeat_extend_back_end_calcBlockSize: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET repeat_dst_size_check_calcBlockSize: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_calcBlockSize - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_calcBlockSize - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_calcBlockSize - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_calcBlockSize - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_repeat_emit_calcBlockSize four_bytes_repeat_emit_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_repeat_emit_calcBlockSize three_bytes_repeat_emit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_repeat_emit_calcBlockSize two_bytes_repeat_emit_calcBlockSize: - ADDQ $0x02, AX - CMPL BX, $0x40 + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_calcBlockSize JMP memmove_long_repeat_emit_calcBlockSize one_byte_repeat_emit_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_repeat_emit_calcBlockSize: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX JMP emit_literal_done_repeat_emit_calcBlockSize memmove_long_repeat_emit_calcBlockSize: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX emit_literal_done_repeat_emit_calcBlockSize: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+8(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+8(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_calcBlockSize: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_calcBlockSize - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSize - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_calcBlockSize - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_calcBlockSize matchlen_bsf_16repeat_extend_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_calcBlockSize matchlen_match8_repeat_extend_calcBlockSize: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_calcBlockSize - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSize - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_calcBlockSize matchlen_bsf_8_repeat_extend_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_calcBlockSize matchlen_match4_repeat_extend_calcBlockSize: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_calcBlockSize - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_calcBlockSize - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_calcBlockSize: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_calcBlockSize JB repeat_extend_forward_end_calcBlockSize - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_calcBlockSize - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_calcBlockSize matchlen_match1_repeat_extend_calcBlockSize: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_calcBlockSize - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_calcBlockSize: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_calcBlockSize four_bytes_loop_back_repeat_as_copy_calcBlockSize: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_calcBlockSize - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_calcBlockSize JMP four_bytes_loop_back_repeat_as_copy_calcBlockSize four_bytes_remain_repeat_as_copy_calcBlockSize: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_calcBlockSize - XORL BX, BX - ADDQ $0x05, AX + XORL SI, SI + ADDQ $0x05, CX JMP repeat_end_emit_calcBlockSize two_byte_offset_repeat_as_copy_calcBlockSize: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_calcBlockSize - LEAL -60(BX), BX - ADDQ $0x03, AX + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_calcBlockSize two_byte_offset_short_repeat_as_copy_calcBlockSize: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_calcBlockSize - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_calcBlockSize - ADDQ $0x02, AX + ADDQ $0x02, CX JMP repeat_end_emit_calcBlockSize emit_copy_three_repeat_as_copy_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX repeat_end_emit_calcBlockSize: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_calcBlockSize no_repeat_found_calcBlockSize: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_calcBlockSize - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_calcBlockSize - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_calcBlockSize - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_calcBlockSize candidate3_match_calcBlockSize: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_calcBlockSize candidate2_match_calcBlockSize: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_calcBlockSize: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_calcBlockSize match_extend_back_loop_calcBlockSize: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_calcBlockSize - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_calcBlockSize - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_calcBlockSize JMP match_extend_back_loop_calcBlockSize match_extend_back_end_calcBlockSize: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_dst_size_check_calcBlockSize: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_calcBlockSize - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), DI + CMPL DI, $0x3c JB one_byte_match_emit_calcBlockSize - CMPL SI, $0x00000100 + CMPL DI, $0x00000100 JB two_bytes_match_emit_calcBlockSize - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB three_bytes_match_emit_calcBlockSize - CMPL SI, $0x01000000 + CMPL DI, $0x01000000 JB four_bytes_match_emit_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_match_emit_calcBlockSize four_bytes_match_emit_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_match_emit_calcBlockSize three_bytes_match_emit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_match_emit_calcBlockSize two_bytes_match_emit_calcBlockSize: - ADDQ $0x02, AX - CMPL SI, $0x40 + ADDQ $0x02, CX + CMPL DI, $0x40 JB memmove_match_emit_calcBlockSize JMP memmove_long_match_emit_calcBlockSize one_byte_match_emit_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_match_emit_calcBlockSize: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX JMP emit_literal_done_match_emit_calcBlockSize memmove_long_match_emit_calcBlockSize: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX emit_literal_done_match_emit_calcBlockSize: match_nolit_loop_calcBlockSize: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+8(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+8(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_calcBlockSize: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_calcBlockSize - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSize - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_calcBlockSize - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_calcBlockSize matchlen_bsf_16match_nolit_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_calcBlockSize matchlen_match8_match_nolit_calcBlockSize: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_calcBlockSize - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSize - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_calcBlockSize matchlen_bsf_8_match_nolit_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_calcBlockSize matchlen_match4_match_nolit_calcBlockSize: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_calcBlockSize - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_calcBlockSize - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_calcBlockSize: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_calcBlockSize JB match_nolit_end_calcBlockSize - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_calcBlockSize - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_calcBlockSize matchlen_match1_match_nolit_calcBlockSize: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_calcBlockSize - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_calcBlockSize: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_calcBlockSize four_bytes_loop_back_match_nolit_calcBlockSize: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_calcBlockSize - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_calcBlockSize JMP four_bytes_loop_back_match_nolit_calcBlockSize four_bytes_remain_match_nolit_calcBlockSize: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_calcBlockSize - XORL BX, BX - ADDQ $0x05, AX + XORL SI, SI + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_calcBlockSize two_byte_offset_match_nolit_calcBlockSize: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_calcBlockSize - LEAL -60(R9), R9 - ADDQ $0x03, AX + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_calcBlockSize two_byte_offset_short_match_nolit_calcBlockSize: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_calcBlockSize - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_calcBlockSize - ADDQ $0x02, AX + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_calcBlockSize emit_copy_three_match_nolit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX match_nolit_emitcopy_end_calcBlockSize: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_calcBlockSize - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_nolit_dst_ok_calcBlockSize: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x33, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x33, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x33, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x33, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_calcBlockSize - INCL CX + INCL DX JMP search_loop_calcBlockSize emit_remainder_calcBlockSize: - MOVQ src_len+8(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+8(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET emit_remainder_ok_calcBlockSize: - MOVQ src_len+8(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+8(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_calcBlockSize - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI - LEAL -1(SI), CX - CMPL CX, $0x3c + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI + LEAL -1(SI), AX + CMPL AX, $0x3c JB one_byte_emit_remainder_calcBlockSize - CMPL CX, $0x00000100 + CMPL AX, $0x00000100 JB two_bytes_emit_remainder_calcBlockSize - CMPL CX, $0x00010000 + CMPL AX, $0x00010000 JB three_bytes_emit_remainder_calcBlockSize - CMPL CX, $0x01000000 + CMPL AX, $0x01000000 JB four_bytes_emit_remainder_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_emit_remainder_calcBlockSize four_bytes_emit_remainder_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_emit_remainder_calcBlockSize three_bytes_emit_remainder_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_emit_remainder_calcBlockSize two_bytes_emit_remainder_calcBlockSize: - ADDQ $0x02, AX - CMPL CX, $0x40 + ADDQ $0x02, CX + CMPL AX, $0x40 JB memmove_emit_remainder_calcBlockSize JMP memmove_long_emit_remainder_calcBlockSize one_byte_emit_remainder_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_emit_remainder_calcBlockSize: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX JMP emit_literal_done_emit_remainder_calcBlockSize memmove_long_emit_remainder_calcBlockSize: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX emit_literal_done_emit_remainder_calcBlockSize: - MOVQ AX, ret+24(FP) + MOVQ CX, ret+32(FP) RET -// func calcBlockSizeSmall(src []byte) int +// func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int // Requires: BMI, SSE2 -TEXT ·calcBlockSizeSmall(SB), $2072-32 - XORQ AX, AX - MOVQ $0x00000010, CX - LEAQ 24(SP), DX +TEXT ·calcBlockSizeSmall(SB), $24-40 + MOVQ tmp+24(FP), AX + XORQ CX, CX + MOVQ $0x00000010, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_calcBlockSizeSmall: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_calcBlockSizeSmall MOVL $0x00000000, 12(SP) - MOVQ src_len+8(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+0(FP), BX search_loop_calcBlockSizeSmall: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_calcBlockSizeSmall - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x37, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x37, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x37, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x37, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x37, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_calcBlockSizeSmall - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_calcBlockSizeSmall repeat_extend_back_loop_calcBlockSizeSmall: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_calcBlockSizeSmall - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_calcBlockSizeSmall - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_calcBlockSizeSmall repeat_extend_back_end_calcBlockSizeSmall: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET repeat_dst_size_check_calcBlockSizeSmall: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_calcBlockSizeSmall - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_calcBlockSizeSmall - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_calcBlockSizeSmall JB three_bytes_repeat_emit_calcBlockSizeSmall three_bytes_repeat_emit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_repeat_emit_calcBlockSizeSmall two_bytes_repeat_emit_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL BX, $0x40 + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_calcBlockSizeSmall JMP memmove_long_repeat_emit_calcBlockSizeSmall one_byte_repeat_emit_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_repeat_emit_calcBlockSizeSmall: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX JMP emit_literal_done_repeat_emit_calcBlockSizeSmall memmove_long_repeat_emit_calcBlockSizeSmall: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX emit_literal_done_repeat_emit_calcBlockSizeSmall: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+8(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+8(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_calcBlockSizeSmall - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_calcBlockSizeSmall - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_calcBlockSizeSmall matchlen_bsf_16repeat_extend_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_calcBlockSizeSmall matchlen_match8_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_calcBlockSizeSmall - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_calcBlockSizeSmall matchlen_bsf_8_repeat_extend_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_calcBlockSizeSmall matchlen_match4_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_calcBlockSizeSmall - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_calcBlockSizeSmall - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_calcBlockSizeSmall JB repeat_extend_forward_end_calcBlockSizeSmall - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_calcBlockSizeSmall - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_calcBlockSizeSmall matchlen_match1_repeat_extend_calcBlockSizeSmall: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_calcBlockSizeSmall - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_calcBlockSizeSmall: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_calcBlockSizeSmall: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall - LEAL -60(BX), BX - ADDQ $0x03, AX + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_calcBlockSizeSmall two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall: - MOVL BX, SI - SHLL $0x02, SI - CMPL BX, $0x0c + MOVL SI, DI + SHLL $0x02, DI + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_calcBlockSizeSmall - ADDQ $0x02, AX + ADDQ $0x02, CX JMP repeat_end_emit_calcBlockSizeSmall emit_copy_three_repeat_as_copy_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX repeat_end_emit_calcBlockSizeSmall: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_calcBlockSizeSmall no_repeat_found_calcBlockSizeSmall: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_calcBlockSizeSmall - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_calcBlockSizeSmall - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_calcBlockSizeSmall - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_calcBlockSizeSmall candidate3_match_calcBlockSizeSmall: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_calcBlockSizeSmall candidate2_match_calcBlockSizeSmall: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_calcBlockSizeSmall: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_calcBlockSizeSmall match_extend_back_loop_calcBlockSizeSmall: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_calcBlockSizeSmall - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_calcBlockSizeSmall - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_calcBlockSizeSmall JMP match_extend_back_loop_calcBlockSizeSmall match_extend_back_end_calcBlockSizeSmall: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_dst_size_check_calcBlockSizeSmall: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_calcBlockSizeSmall - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), DI + CMPL DI, $0x3c JB one_byte_match_emit_calcBlockSizeSmall - CMPL SI, $0x00000100 + CMPL DI, $0x00000100 JB two_bytes_match_emit_calcBlockSizeSmall JB three_bytes_match_emit_calcBlockSizeSmall three_bytes_match_emit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_match_emit_calcBlockSizeSmall two_bytes_match_emit_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL SI, $0x40 + ADDQ $0x02, CX + CMPL DI, $0x40 JB memmove_match_emit_calcBlockSizeSmall JMP memmove_long_match_emit_calcBlockSizeSmall one_byte_match_emit_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_match_emit_calcBlockSizeSmall: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX JMP emit_literal_done_match_emit_calcBlockSizeSmall memmove_long_match_emit_calcBlockSizeSmall: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX emit_literal_done_match_emit_calcBlockSizeSmall: match_nolit_loop_calcBlockSizeSmall: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+8(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+8(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_calcBlockSizeSmall - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_calcBlockSizeSmall - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_calcBlockSizeSmall matchlen_bsf_16match_nolit_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_calcBlockSizeSmall matchlen_match8_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_calcBlockSizeSmall - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_calcBlockSizeSmall matchlen_bsf_8_match_nolit_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_calcBlockSizeSmall matchlen_match4_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_calcBlockSizeSmall - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_calcBlockSizeSmall - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_calcBlockSizeSmall JB match_nolit_end_calcBlockSizeSmall - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_calcBlockSizeSmall - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_calcBlockSizeSmall matchlen_match1_match_nolit_calcBlockSizeSmall: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_calcBlockSizeSmall - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_calcBlockSizeSmall: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_calcBlockSizeSmall: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_calcBlockSizeSmall - LEAL -60(R9), R9 - ADDQ $0x03, AX + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_calcBlockSizeSmall two_byte_offset_short_match_nolit_calcBlockSizeSmall: - MOVL R9, BX - SHLL $0x02, BX - CMPL R9, $0x0c + MOVL R10, SI + SHLL $0x02, SI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_calcBlockSizeSmall - ADDQ $0x02, AX + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_calcBlockSizeSmall emit_copy_three_match_nolit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX match_nolit_emitcopy_end_calcBlockSizeSmall: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_calcBlockSizeSmall - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_nolit_dst_ok_calcBlockSizeSmall: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x37, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x37, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x37, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x37, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_calcBlockSizeSmall - INCL CX + INCL DX JMP search_loop_calcBlockSizeSmall emit_remainder_calcBlockSizeSmall: - MOVQ src_len+8(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+8(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET emit_remainder_ok_calcBlockSizeSmall: - MOVQ src_len+8(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+8(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_calcBlockSizeSmall - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI - LEAL -1(SI), CX - CMPL CX, $0x3c + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI + LEAL -1(SI), AX + CMPL AX, $0x3c JB one_byte_emit_remainder_calcBlockSizeSmall - CMPL CX, $0x00000100 + CMPL AX, $0x00000100 JB two_bytes_emit_remainder_calcBlockSizeSmall JB three_bytes_emit_remainder_calcBlockSizeSmall three_bytes_emit_remainder_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_emit_remainder_calcBlockSizeSmall two_bytes_emit_remainder_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL CX, $0x40 + ADDQ $0x02, CX + CMPL AX, $0x40 JB memmove_emit_remainder_calcBlockSizeSmall JMP memmove_long_emit_remainder_calcBlockSizeSmall one_byte_emit_remainder_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_emit_remainder_calcBlockSizeSmall: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX JMP emit_literal_done_emit_remainder_calcBlockSizeSmall memmove_long_emit_remainder_calcBlockSizeSmall: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX emit_literal_done_emit_remainder_calcBlockSizeSmall: - MOVQ AX, ret+24(FP) + MOVQ CX, ret+32(FP) RET // func emitLiteral(dst []byte, lit []byte) int @@ -19783,7 +19809,7 @@ TEXT ·cvtLZ4BlockAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX XORQ DI, DI lz4_s2_loop: @@ -20266,7 +20292,7 @@ TEXT ·cvtLZ4sBlockAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX XORQ DI, DI lz4s_s2_loop: @@ -20751,7 +20777,7 @@ TEXT ·cvtLZ4BlockSnappyAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX lz4_snappy_loop: CMPQ DX, BX @@ -21017,7 +21043,7 @@ TEXT ·cvtLZ4sBlockSnappyAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX lz4s_snappy_loop: CMPQ DX, BX diff --git a/vendor/github.com/klauspost/compress/s2/writer.go b/vendor/github.com/klauspost/compress/s2/writer.go index 0a46f2b..fd15078 100644 --- a/vendor/github.com/klauspost/compress/s2/writer.go +++ b/vendor/github.com/klauspost/compress/s2/writer.go @@ -83,11 +83,14 @@ type Writer struct { snappy bool flushOnWrite bool appendIndex bool + bufferCB func([]byte) level uint8 } type result struct { b []byte + // return when writing + ret []byte // Uncompressed start offset startOffset int64 } @@ -146,6 +149,10 @@ func (w *Writer) Reset(writer io.Writer) { for write := range toWrite { // Wait for the data to be available. input := <-write + if input.ret != nil && w.bufferCB != nil { + w.bufferCB(input.ret) + input.ret = nil + } in := input.b if len(in) > 0 { if w.err(nil) == nil { @@ -341,7 +348,8 @@ func (w *Writer) AddSkippableBlock(id uint8, data []byte) (err error) { // but the input buffer cannot be written to by the caller // until Flush or Close has been called when concurrency != 1. // -// If you cannot control that, use the regular Write function. +// Use the WriterBufferDone to receive a callback when the buffer is done +// Processing. // // Note that input is not buffered. // This means that each write will result in discrete blocks being created. @@ -364,6 +372,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) { } if w.concurrency == 1 { _, err := w.writeSync(buf) + if w.bufferCB != nil { + w.bufferCB(buf) + } return err } @@ -378,7 +389,7 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) { hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes} } } - + orgBuf := buf for len(buf) > 0 { // Cut input. uncompressed := buf @@ -397,6 +408,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) { startOffset: w.uncompWritten, } w.uncompWritten += int64(len(uncompressed)) + if len(buf) == 0 && w.bufferCB != nil { + res.ret = orgBuf + } go func() { race.ReadSlice(uncompressed) @@ -922,7 +936,7 @@ func WriterBetterCompression() WriterOption { } // WriterBestCompression will enable better compression. -// EncodeBetter compresses better than Encode but typically with a +// EncodeBest compresses better than Encode but typically with a // big speed decrease on compression. func WriterBestCompression() WriterOption { return func(w *Writer) error { @@ -941,6 +955,17 @@ func WriterUncompressed() WriterOption { } } +// WriterBufferDone will perform a callback when EncodeBuffer has finished +// writing a buffer to the output and the buffer can safely be reused. +// If the buffer was split into several blocks, it will be sent after the last block. +// Callbacks will not be done concurrently. +func WriterBufferDone(fn func(b []byte)) WriterOption { + return func(w *Writer) error { + w.bufferCB = fn + return nil + } +} + // WriterBlockSize allows to override the default block size. // Blocks will be this size or smaller. // Minimum size is 4KB and maximum size is 4MB. diff --git a/vendor/github.com/klauspost/cpuid/v2/README.md b/vendor/github.com/klauspost/cpuid/v2/README.md index 21508ed..f06ba51 100644 --- a/vendor/github.com/klauspost/cpuid/v2/README.md +++ b/vendor/github.com/klauspost/cpuid/v2/README.md @@ -281,6 +281,7 @@ Exit Code 1 | AMXBF16 | Tile computational operations on BFLOAT16 numbers | | AMXINT8 | Tile computational operations on 8-bit integers | | AMXFP16 | Tile computational operations on FP16 numbers | +| AMXFP8 | Tile computational operations on FP8 numbers | | AMXTILE | Tile architecture | | APX_F | Intel APX | | AVX | AVX functions | diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go index 53bc18c..db99eb6 100644 --- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go @@ -55,6 +55,12 @@ const ( Qualcomm Marvell + QEMU + QNX + ACRN + SRE + Apple + lastVendor ) @@ -75,6 +81,7 @@ const ( AMXBF16 // Tile computational operations on BFLOAT16 numbers AMXFP16 // Tile computational operations on FP16 numbers AMXINT8 // Tile computational operations on 8-bit integers + AMXFP8 // Tile computational operations on FP8 numbers AMXTILE // Tile architecture APX_F // Intel APX AVX // AVX functions @@ -296,20 +303,22 @@ const ( // CPUInfo contains information about the detected system CPU. type CPUInfo struct { - BrandName string // Brand name reported by the CPU - VendorID Vendor // Comparable CPU vendor ID - VendorString string // Raw vendor string. - featureSet flagSet // Features of the CPU - PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. - ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. - LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. - Family int // CPU family number - Model int // CPU model number - Stepping int // CPU stepping info - CacheLine int // Cache line size in bytes. Will be 0 if undetectable. - Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed. - BoostFreq int64 // Max clock speed, if known, 0 otherwise - Cache struct { + BrandName string // Brand name reported by the CPU + VendorID Vendor // Comparable CPU vendor ID + VendorString string // Raw vendor string. + HypervisorVendorID Vendor // Hypervisor vendor + HypervisorVendorString string // Raw hypervisor vendor string + featureSet flagSet // Features of the CPU + PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. + ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. + LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. + Family int // CPU family number + Model int // CPU model number + Stepping int // CPU stepping info + CacheLine int // Cache line size in bytes. Will be 0 if undetectable. + Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed. + BoostFreq int64 // Max clock speed, if known, 0 otherwise + Cache struct { L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected L2 int // L2 Cache (per core or shared). Will be -1 if undetected @@ -318,8 +327,9 @@ type CPUInfo struct { SGX SGXSupport AMDMemEncryption AMDMemEncryptionSupport AVX10Level uint8 - maxFunc uint32 - maxExFunc uint32 + + maxFunc uint32 + maxExFunc uint32 } var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) @@ -503,7 +513,7 @@ func (c CPUInfo) FeatureSet() []string { // Uses the RDTSCP instruction. The value 0 is returned // if the CPU does not support the instruction. func (c CPUInfo) RTCounter() uint64 { - if !c.Supports(RDTSCP) { + if !c.Has(RDTSCP) { return 0 } a, _, _, d := rdtscpAsm() @@ -515,13 +525,22 @@ func (c CPUInfo) RTCounter() uint64 { // about the current cpu/core the code is running on. // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. func (c CPUInfo) Ia32TscAux() uint32 { - if !c.Supports(RDTSCP) { + if !c.Has(RDTSCP) { return 0 } _, _, ecx, _ := rdtscpAsm() return ecx } +// SveLengths returns arm SVE vector and predicate lengths. +// Will return 0, 0 if SVE is not enabled or otherwise unable to detect. +func (c CPUInfo) SveLengths() (vl, pl uint64) { + if !c.Has(SVE) { + return 0, 0 + } + return getVectorLength() +} + // LogicalCPU will return the Logical CPU the code is currently executing on. // This is likely to change when the OS re-schedules the running thread // to another CPU. @@ -781,11 +800,16 @@ func threadsPerCore() int { _, b, _, _ := cpuidex(0xb, 0) if b&0xffff == 0 { if vend == AMD { - // Workaround for AMD returning 0, assume 2 if >= Zen 2 - // It will be more correct than not. + // if >= Zen 2 0x8000001e EBX 15-8 bits means threads per core. + // The number of threads per core is ThreadsPerCore+1 + // See PPR for AMD Family 17h Models 00h-0Fh (page 82) fam, _, _ := familyModel() _, _, _, d := cpuid(1) if (d&(1<<28)) != 0 && fam >= 23 { + if maxExtendedFunction() >= 0x8000001e { + _, b, _, _ := cpuid(0x8000001e) + return int((b>>8)&0xff) + 1 + } return 2 } } @@ -877,7 +901,9 @@ var vendorMapping = map[string]Vendor{ "GenuineTMx86": Transmeta, "Geode by NSC": NSC, "VIA VIA VIA ": VIA, - "KVMKVMKVMKVM": KVM, + "KVMKVMKVM": KVM, + "Linux KVM Hv": KVM, + "TCGTCGTCGTCG": QEMU, "Microsoft Hv": MSVM, "VMwareVMware": VMware, "XenVMMXenVMM": XenHVM, @@ -887,6 +913,10 @@ var vendorMapping = map[string]Vendor{ "SiS SiS SiS ": SiS, "RiseRiseRise": SiS, "Genuine RDC": RDC, + "QNXQVMBSQG": QNX, + "ACRNACRNACRN": ACRN, + "SRESRESRESRE": SRE, + "Apple VZ": Apple, } func vendorID() (Vendor, string) { @@ -899,6 +929,17 @@ func vendorID() (Vendor, string) { return vend, v } +func hypervisorVendorID() (Vendor, string) { + // https://lwn.net/Articles/301888/ + _, b, c, d := cpuid(0x40000000) + v := string(valAsString(b, c, d)) + vend, ok := vendorMapping[v] + if !ok { + return VendorUnknown, v + } + return vend, v +} + func cacheLine() int { if maxFunctionID() < 0x1 { return 0 @@ -1271,6 +1312,7 @@ func support() flagSet { fs.setIf(ebx&(1<<31) != 0, AVX512VL) // ecx fs.setIf(ecx&(1<<1) != 0, AVX512VBMI) + fs.setIf(ecx&(1<<3) != 0, AMXFP8) fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2) fs.setIf(ecx&(1<<11) != 0, AVX512VNNI) fs.setIf(ecx&(1<<12) != 0, AVX512BITALG) diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s index b31d6ae..b196f78 100644 --- a/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s @@ -24,3 +24,13 @@ TEXT ·getInstAttributes(SB), 7, $0 MOVD R1, instAttrReg1+8(FP) RET +TEXT ·getVectorLength(SB), 7, $0 + WORD $0xd2800002 // mov x2, #0 + WORD $0x04225022 // addvl x2, x2, #1 + WORD $0xd37df042 // lsl x2, x2, #3 + WORD $0xd2800003 // mov x3, #0 + WORD $0x04635023 // addpl x3, x3, #1 + WORD $0xd37df063 // lsl x3, x3, #3 + MOVD R2, vl+0(FP) + MOVD R3, pl+8(FP) + RET diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go b/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go index 9a53504..566743d 100644 --- a/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go +++ b/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go @@ -10,6 +10,7 @@ import "runtime" func getMidr() (midr uint64) func getProcFeatures() (procFeatures uint64) func getInstAttributes() (instAttrReg0, instAttrReg1 uint64) +func getVectorLength() (vl, pl uint64) func initCPU() { cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } @@ -24,7 +25,7 @@ func addInfo(c *CPUInfo, safe bool) { detectOS(c) // ARM64 disabled since it may crash if interrupt is not intercepted by OS. - if safe && !c.Supports(ARMCPUID) && runtime.GOOS != "freebsd" { + if safe && !c.Has(ARMCPUID) && runtime.GOOS != "freebsd" { return } midr := getMidr() diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_ref.go b/vendor/github.com/klauspost/cpuid/v2/detect_ref.go index 9636c2b..574f938 100644 --- a/vendor/github.com/klauspost/cpuid/v2/detect_ref.go +++ b/vendor/github.com/klauspost/cpuid/v2/detect_ref.go @@ -10,6 +10,8 @@ func initCPU() { cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } xgetbv = func(uint32) (a, b uint32) { return 0, 0 } rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 } + } func addInfo(info *CPUInfo, safe bool) {} +func getVectorLength() (vl, pl uint64) { return 0, 0 } diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go index 799b400..f924c9d 100644 --- a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go +++ b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go @@ -32,7 +32,10 @@ func addInfo(c *CPUInfo, safe bool) { c.LogicalCores = logicalCores() c.PhysicalCores = physicalCores() c.VendorID, c.VendorString = vendorID() + c.HypervisorVendorID, c.HypervisorVendorString = hypervisorVendorID() c.AVX10Level = c.supportAVX10() c.cacheSize() c.frequencies() } + +func getVectorLength() (vl, pl uint64) { return 0, 0 } diff --git a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go index 3a25603..e7f874a 100644 --- a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go +++ b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go @@ -15,224 +15,225 @@ func _() { _ = x[AMXBF16-5] _ = x[AMXFP16-6] _ = x[AMXINT8-7] - _ = x[AMXTILE-8] - _ = x[APX_F-9] - _ = x[AVX-10] - _ = x[AVX10-11] - _ = x[AVX10_128-12] - _ = x[AVX10_256-13] - _ = x[AVX10_512-14] - _ = x[AVX2-15] - _ = x[AVX512BF16-16] - _ = x[AVX512BITALG-17] - _ = x[AVX512BW-18] - _ = x[AVX512CD-19] - _ = x[AVX512DQ-20] - _ = x[AVX512ER-21] - _ = x[AVX512F-22] - _ = x[AVX512FP16-23] - _ = x[AVX512IFMA-24] - _ = x[AVX512PF-25] - _ = x[AVX512VBMI-26] - _ = x[AVX512VBMI2-27] - _ = x[AVX512VL-28] - _ = x[AVX512VNNI-29] - _ = x[AVX512VP2INTERSECT-30] - _ = x[AVX512VPOPCNTDQ-31] - _ = x[AVXIFMA-32] - _ = x[AVXNECONVERT-33] - _ = x[AVXSLOW-34] - _ = x[AVXVNNI-35] - _ = x[AVXVNNIINT8-36] - _ = x[AVXVNNIINT16-37] - _ = x[BHI_CTRL-38] - _ = x[BMI1-39] - _ = x[BMI2-40] - _ = x[CETIBT-41] - _ = x[CETSS-42] - _ = x[CLDEMOTE-43] - _ = x[CLMUL-44] - _ = x[CLZERO-45] - _ = x[CMOV-46] - _ = x[CMPCCXADD-47] - _ = x[CMPSB_SCADBS_SHORT-48] - _ = x[CMPXCHG8-49] - _ = x[CPBOOST-50] - _ = x[CPPC-51] - _ = x[CX16-52] - _ = x[EFER_LMSLE_UNS-53] - _ = x[ENQCMD-54] - _ = x[ERMS-55] - _ = x[F16C-56] - _ = x[FLUSH_L1D-57] - _ = x[FMA3-58] - _ = x[FMA4-59] - _ = x[FP128-60] - _ = x[FP256-61] - _ = x[FSRM-62] - _ = x[FXSR-63] - _ = x[FXSROPT-64] - _ = x[GFNI-65] - _ = x[HLE-66] - _ = x[HRESET-67] - _ = x[HTT-68] - _ = x[HWA-69] - _ = x[HYBRID_CPU-70] - _ = x[HYPERVISOR-71] - _ = x[IA32_ARCH_CAP-72] - _ = x[IA32_CORE_CAP-73] - _ = x[IBPB-74] - _ = x[IBPB_BRTYPE-75] - _ = x[IBRS-76] - _ = x[IBRS_PREFERRED-77] - _ = x[IBRS_PROVIDES_SMP-78] - _ = x[IBS-79] - _ = x[IBSBRNTRGT-80] - _ = x[IBSFETCHSAM-81] - _ = x[IBSFFV-82] - _ = x[IBSOPCNT-83] - _ = x[IBSOPCNTEXT-84] - _ = x[IBSOPSAM-85] - _ = x[IBSRDWROPCNT-86] - _ = x[IBSRIPINVALIDCHK-87] - _ = x[IBS_FETCH_CTLX-88] - _ = x[IBS_OPDATA4-89] - _ = x[IBS_OPFUSE-90] - _ = x[IBS_PREVENTHOST-91] - _ = x[IBS_ZEN4-92] - _ = x[IDPRED_CTRL-93] - _ = x[INT_WBINVD-94] - _ = x[INVLPGB-95] - _ = x[KEYLOCKER-96] - _ = x[KEYLOCKERW-97] - _ = x[LAHF-98] - _ = x[LAM-99] - _ = x[LBRVIRT-100] - _ = x[LZCNT-101] - _ = x[MCAOVERFLOW-102] - _ = x[MCDT_NO-103] - _ = x[MCOMMIT-104] - _ = x[MD_CLEAR-105] - _ = x[MMX-106] - _ = x[MMXEXT-107] - _ = x[MOVBE-108] - _ = x[MOVDIR64B-109] - _ = x[MOVDIRI-110] - _ = x[MOVSB_ZL-111] - _ = x[MOVU-112] - _ = x[MPX-113] - _ = x[MSRIRC-114] - _ = x[MSRLIST-115] - _ = x[MSR_PAGEFLUSH-116] - _ = x[NRIPS-117] - _ = x[NX-118] - _ = x[OSXSAVE-119] - _ = x[PCONFIG-120] - _ = x[POPCNT-121] - _ = x[PPIN-122] - _ = x[PREFETCHI-123] - _ = x[PSFD-124] - _ = x[RDPRU-125] - _ = x[RDRAND-126] - _ = x[RDSEED-127] - _ = x[RDTSCP-128] - _ = x[RRSBA_CTRL-129] - _ = x[RTM-130] - _ = x[RTM_ALWAYS_ABORT-131] - _ = x[SBPB-132] - _ = x[SERIALIZE-133] - _ = x[SEV-134] - _ = x[SEV_64BIT-135] - _ = x[SEV_ALTERNATIVE-136] - _ = x[SEV_DEBUGSWAP-137] - _ = x[SEV_ES-138] - _ = x[SEV_RESTRICTED-139] - _ = x[SEV_SNP-140] - _ = x[SGX-141] - _ = x[SGXLC-142] - _ = x[SHA-143] - _ = x[SME-144] - _ = x[SME_COHERENT-145] - _ = x[SPEC_CTRL_SSBD-146] - _ = x[SRBDS_CTRL-147] - _ = x[SRSO_MSR_FIX-148] - _ = x[SRSO_NO-149] - _ = x[SRSO_USER_KERNEL_NO-150] - _ = x[SSE-151] - _ = x[SSE2-152] - _ = x[SSE3-153] - _ = x[SSE4-154] - _ = x[SSE42-155] - _ = x[SSE4A-156] - _ = x[SSSE3-157] - _ = x[STIBP-158] - _ = x[STIBP_ALWAYSON-159] - _ = x[STOSB_SHORT-160] - _ = x[SUCCOR-161] - _ = x[SVM-162] - _ = x[SVMDA-163] - _ = x[SVMFBASID-164] - _ = x[SVML-165] - _ = x[SVMNP-166] - _ = x[SVMPF-167] - _ = x[SVMPFT-168] - _ = x[SYSCALL-169] - _ = x[SYSEE-170] - _ = x[TBM-171] - _ = x[TDX_GUEST-172] - _ = x[TLB_FLUSH_NESTED-173] - _ = x[TME-174] - _ = x[TOPEXT-175] - _ = x[TSCRATEMSR-176] - _ = x[TSXLDTRK-177] - _ = x[VAES-178] - _ = x[VMCBCLEAN-179] - _ = x[VMPL-180] - _ = x[VMSA_REGPROT-181] - _ = x[VMX-182] - _ = x[VPCLMULQDQ-183] - _ = x[VTE-184] - _ = x[WAITPKG-185] - _ = x[WBNOINVD-186] - _ = x[WRMSRNS-187] - _ = x[X87-188] - _ = x[XGETBV1-189] - _ = x[XOP-190] - _ = x[XSAVE-191] - _ = x[XSAVEC-192] - _ = x[XSAVEOPT-193] - _ = x[XSAVES-194] - _ = x[AESARM-195] - _ = x[ARMCPUID-196] - _ = x[ASIMD-197] - _ = x[ASIMDDP-198] - _ = x[ASIMDHP-199] - _ = x[ASIMDRDM-200] - _ = x[ATOMICS-201] - _ = x[CRC32-202] - _ = x[DCPOP-203] - _ = x[EVTSTRM-204] - _ = x[FCMA-205] - _ = x[FP-206] - _ = x[FPHP-207] - _ = x[GPA-208] - _ = x[JSCVT-209] - _ = x[LRCPC-210] - _ = x[PMULL-211] - _ = x[SHA1-212] - _ = x[SHA2-213] - _ = x[SHA3-214] - _ = x[SHA512-215] - _ = x[SM3-216] - _ = x[SM4-217] - _ = x[SVE-218] - _ = x[lastID-219] + _ = x[AMXFP8-8] + _ = x[AMXTILE-9] + _ = x[APX_F-10] + _ = x[AVX-11] + _ = x[AVX10-12] + _ = x[AVX10_128-13] + _ = x[AVX10_256-14] + _ = x[AVX10_512-15] + _ = x[AVX2-16] + _ = x[AVX512BF16-17] + _ = x[AVX512BITALG-18] + _ = x[AVX512BW-19] + _ = x[AVX512CD-20] + _ = x[AVX512DQ-21] + _ = x[AVX512ER-22] + _ = x[AVX512F-23] + _ = x[AVX512FP16-24] + _ = x[AVX512IFMA-25] + _ = x[AVX512PF-26] + _ = x[AVX512VBMI-27] + _ = x[AVX512VBMI2-28] + _ = x[AVX512VL-29] + _ = x[AVX512VNNI-30] + _ = x[AVX512VP2INTERSECT-31] + _ = x[AVX512VPOPCNTDQ-32] + _ = x[AVXIFMA-33] + _ = x[AVXNECONVERT-34] + _ = x[AVXSLOW-35] + _ = x[AVXVNNI-36] + _ = x[AVXVNNIINT8-37] + _ = x[AVXVNNIINT16-38] + _ = x[BHI_CTRL-39] + _ = x[BMI1-40] + _ = x[BMI2-41] + _ = x[CETIBT-42] + _ = x[CETSS-43] + _ = x[CLDEMOTE-44] + _ = x[CLMUL-45] + _ = x[CLZERO-46] + _ = x[CMOV-47] + _ = x[CMPCCXADD-48] + _ = x[CMPSB_SCADBS_SHORT-49] + _ = x[CMPXCHG8-50] + _ = x[CPBOOST-51] + _ = x[CPPC-52] + _ = x[CX16-53] + _ = x[EFER_LMSLE_UNS-54] + _ = x[ENQCMD-55] + _ = x[ERMS-56] + _ = x[F16C-57] + _ = x[FLUSH_L1D-58] + _ = x[FMA3-59] + _ = x[FMA4-60] + _ = x[FP128-61] + _ = x[FP256-62] + _ = x[FSRM-63] + _ = x[FXSR-64] + _ = x[FXSROPT-65] + _ = x[GFNI-66] + _ = x[HLE-67] + _ = x[HRESET-68] + _ = x[HTT-69] + _ = x[HWA-70] + _ = x[HYBRID_CPU-71] + _ = x[HYPERVISOR-72] + _ = x[IA32_ARCH_CAP-73] + _ = x[IA32_CORE_CAP-74] + _ = x[IBPB-75] + _ = x[IBPB_BRTYPE-76] + _ = x[IBRS-77] + _ = x[IBRS_PREFERRED-78] + _ = x[IBRS_PROVIDES_SMP-79] + _ = x[IBS-80] + _ = x[IBSBRNTRGT-81] + _ = x[IBSFETCHSAM-82] + _ = x[IBSFFV-83] + _ = x[IBSOPCNT-84] + _ = x[IBSOPCNTEXT-85] + _ = x[IBSOPSAM-86] + _ = x[IBSRDWROPCNT-87] + _ = x[IBSRIPINVALIDCHK-88] + _ = x[IBS_FETCH_CTLX-89] + _ = x[IBS_OPDATA4-90] + _ = x[IBS_OPFUSE-91] + _ = x[IBS_PREVENTHOST-92] + _ = x[IBS_ZEN4-93] + _ = x[IDPRED_CTRL-94] + _ = x[INT_WBINVD-95] + _ = x[INVLPGB-96] + _ = x[KEYLOCKER-97] + _ = x[KEYLOCKERW-98] + _ = x[LAHF-99] + _ = x[LAM-100] + _ = x[LBRVIRT-101] + _ = x[LZCNT-102] + _ = x[MCAOVERFLOW-103] + _ = x[MCDT_NO-104] + _ = x[MCOMMIT-105] + _ = x[MD_CLEAR-106] + _ = x[MMX-107] + _ = x[MMXEXT-108] + _ = x[MOVBE-109] + _ = x[MOVDIR64B-110] + _ = x[MOVDIRI-111] + _ = x[MOVSB_ZL-112] + _ = x[MOVU-113] + _ = x[MPX-114] + _ = x[MSRIRC-115] + _ = x[MSRLIST-116] + _ = x[MSR_PAGEFLUSH-117] + _ = x[NRIPS-118] + _ = x[NX-119] + _ = x[OSXSAVE-120] + _ = x[PCONFIG-121] + _ = x[POPCNT-122] + _ = x[PPIN-123] + _ = x[PREFETCHI-124] + _ = x[PSFD-125] + _ = x[RDPRU-126] + _ = x[RDRAND-127] + _ = x[RDSEED-128] + _ = x[RDTSCP-129] + _ = x[RRSBA_CTRL-130] + _ = x[RTM-131] + _ = x[RTM_ALWAYS_ABORT-132] + _ = x[SBPB-133] + _ = x[SERIALIZE-134] + _ = x[SEV-135] + _ = x[SEV_64BIT-136] + _ = x[SEV_ALTERNATIVE-137] + _ = x[SEV_DEBUGSWAP-138] + _ = x[SEV_ES-139] + _ = x[SEV_RESTRICTED-140] + _ = x[SEV_SNP-141] + _ = x[SGX-142] + _ = x[SGXLC-143] + _ = x[SHA-144] + _ = x[SME-145] + _ = x[SME_COHERENT-146] + _ = x[SPEC_CTRL_SSBD-147] + _ = x[SRBDS_CTRL-148] + _ = x[SRSO_MSR_FIX-149] + _ = x[SRSO_NO-150] + _ = x[SRSO_USER_KERNEL_NO-151] + _ = x[SSE-152] + _ = x[SSE2-153] + _ = x[SSE3-154] + _ = x[SSE4-155] + _ = x[SSE42-156] + _ = x[SSE4A-157] + _ = x[SSSE3-158] + _ = x[STIBP-159] + _ = x[STIBP_ALWAYSON-160] + _ = x[STOSB_SHORT-161] + _ = x[SUCCOR-162] + _ = x[SVM-163] + _ = x[SVMDA-164] + _ = x[SVMFBASID-165] + _ = x[SVML-166] + _ = x[SVMNP-167] + _ = x[SVMPF-168] + _ = x[SVMPFT-169] + _ = x[SYSCALL-170] + _ = x[SYSEE-171] + _ = x[TBM-172] + _ = x[TDX_GUEST-173] + _ = x[TLB_FLUSH_NESTED-174] + _ = x[TME-175] + _ = x[TOPEXT-176] + _ = x[TSCRATEMSR-177] + _ = x[TSXLDTRK-178] + _ = x[VAES-179] + _ = x[VMCBCLEAN-180] + _ = x[VMPL-181] + _ = x[VMSA_REGPROT-182] + _ = x[VMX-183] + _ = x[VPCLMULQDQ-184] + _ = x[VTE-185] + _ = x[WAITPKG-186] + _ = x[WBNOINVD-187] + _ = x[WRMSRNS-188] + _ = x[X87-189] + _ = x[XGETBV1-190] + _ = x[XOP-191] + _ = x[XSAVE-192] + _ = x[XSAVEC-193] + _ = x[XSAVEOPT-194] + _ = x[XSAVES-195] + _ = x[AESARM-196] + _ = x[ARMCPUID-197] + _ = x[ASIMD-198] + _ = x[ASIMDDP-199] + _ = x[ASIMDHP-200] + _ = x[ASIMDRDM-201] + _ = x[ATOMICS-202] + _ = x[CRC32-203] + _ = x[DCPOP-204] + _ = x[EVTSTRM-205] + _ = x[FCMA-206] + _ = x[FP-207] + _ = x[FPHP-208] + _ = x[GPA-209] + _ = x[JSCVT-210] + _ = x[LRCPC-211] + _ = x[PMULL-212] + _ = x[SHA1-213] + _ = x[SHA2-214] + _ = x[SHA3-215] + _ = x[SHA512-216] + _ = x[SM3-217] + _ = x[SM4-218] + _ = x[SVE-219] + _ = x[lastID-220] _ = x[firstID-0] } -const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8AVXVNNIINT16BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBPB_BRTYPEIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSBPBSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSRSO_MSR_FIXSRSO_NOSRSO_USER_KERNEL_NOSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID" +const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXFP8AMXTILEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8AVXVNNIINT16BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBPB_BRTYPEIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSBPBSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSRSO_MSR_FIXSRSO_NOSRSO_USER_KERNEL_NOSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID" -var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 67, 70, 75, 84, 93, 102, 106, 116, 128, 136, 144, 152, 160, 167, 177, 187, 195, 205, 216, 224, 234, 252, 267, 274, 286, 293, 300, 311, 323, 331, 335, 339, 345, 350, 358, 363, 369, 373, 382, 400, 408, 415, 419, 423, 437, 443, 447, 451, 460, 464, 468, 473, 478, 482, 486, 493, 497, 500, 506, 509, 512, 522, 532, 545, 558, 562, 573, 577, 591, 608, 611, 621, 632, 638, 646, 657, 665, 677, 693, 707, 718, 728, 743, 751, 762, 772, 779, 788, 798, 802, 805, 812, 817, 828, 835, 842, 850, 853, 859, 864, 873, 880, 888, 892, 895, 901, 908, 921, 926, 928, 935, 942, 948, 952, 961, 965, 970, 976, 982, 988, 998, 1001, 1017, 1021, 1030, 1033, 1042, 1057, 1070, 1076, 1090, 1097, 1100, 1105, 1108, 1111, 1123, 1137, 1147, 1159, 1166, 1185, 1188, 1192, 1196, 1200, 1205, 1210, 1215, 1220, 1234, 1245, 1251, 1254, 1259, 1268, 1272, 1277, 1282, 1288, 1295, 1300, 1303, 1312, 1328, 1331, 1337, 1347, 1355, 1359, 1368, 1372, 1384, 1387, 1397, 1400, 1407, 1415, 1422, 1425, 1432, 1435, 1440, 1446, 1454, 1460, 1466, 1474, 1479, 1486, 1493, 1501, 1508, 1513, 1518, 1525, 1529, 1531, 1535, 1538, 1543, 1548, 1553, 1557, 1561, 1565, 1571, 1574, 1577, 1580, 1586} +var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 61, 68, 73, 76, 81, 90, 99, 108, 112, 122, 134, 142, 150, 158, 166, 173, 183, 193, 201, 211, 222, 230, 240, 258, 273, 280, 292, 299, 306, 317, 329, 337, 341, 345, 351, 356, 364, 369, 375, 379, 388, 406, 414, 421, 425, 429, 443, 449, 453, 457, 466, 470, 474, 479, 484, 488, 492, 499, 503, 506, 512, 515, 518, 528, 538, 551, 564, 568, 579, 583, 597, 614, 617, 627, 638, 644, 652, 663, 671, 683, 699, 713, 724, 734, 749, 757, 768, 778, 785, 794, 804, 808, 811, 818, 823, 834, 841, 848, 856, 859, 865, 870, 879, 886, 894, 898, 901, 907, 914, 927, 932, 934, 941, 948, 954, 958, 967, 971, 976, 982, 988, 994, 1004, 1007, 1023, 1027, 1036, 1039, 1048, 1063, 1076, 1082, 1096, 1103, 1106, 1111, 1114, 1117, 1129, 1143, 1153, 1165, 1172, 1191, 1194, 1198, 1202, 1206, 1211, 1216, 1221, 1226, 1240, 1251, 1257, 1260, 1265, 1274, 1278, 1283, 1288, 1294, 1301, 1306, 1309, 1318, 1334, 1337, 1343, 1353, 1361, 1365, 1374, 1378, 1390, 1393, 1403, 1406, 1413, 1421, 1428, 1431, 1438, 1441, 1446, 1452, 1460, 1466, 1472, 1480, 1485, 1492, 1499, 1507, 1514, 1519, 1524, 1531, 1535, 1537, 1541, 1544, 1549, 1554, 1559, 1563, 1567, 1571, 1577, 1580, 1583, 1586, 1592} func (i FeatureID) String() string { if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) { @@ -270,12 +271,17 @@ func _() { _ = x[AMCC-23] _ = x[Qualcomm-24] _ = x[Marvell-25] - _ = x[lastVendor-26] + _ = x[QEMU-26] + _ = x[QNX-27] + _ = x[ACRN-28] + _ = x[SRE-29] + _ = x[Apple-30] + _ = x[lastVendor-31] } -const _Vendor_name = "VendorUnknownIntelAMDVIATransmetaNSCKVMMSVMVMwareXenHVMBhyveHygonSiSRDCAmpereARMBroadcomCaviumDECFujitsuInfineonMotorolaNVIDIAAMCCQualcommMarvelllastVendor" +const _Vendor_name = "VendorUnknownIntelAMDVIATransmetaNSCKVMMSVMVMwareXenHVMBhyveHygonSiSRDCAmpereARMBroadcomCaviumDECFujitsuInfineonMotorolaNVIDIAAMCCQualcommMarvellQEMUQNXACRNSREApplelastVendor" -var _Vendor_index = [...]uint8{0, 13, 18, 21, 24, 33, 36, 39, 43, 49, 55, 60, 65, 68, 71, 77, 80, 88, 94, 97, 104, 112, 120, 126, 130, 138, 145, 155} +var _Vendor_index = [...]uint8{0, 13, 18, 21, 24, 33, 36, 39, 43, 49, 55, 60, 65, 68, 71, 77, 80, 88, 94, 97, 104, 112, 120, 126, 130, 138, 145, 149, 152, 156, 159, 164, 174} func (i Vendor) String() string { if i < 0 || i >= Vendor(len(_Vendor_index)-1) { diff --git a/vendor/github.com/minio/minio-go/v7/Makefile b/vendor/github.com/minio/minio-go/v7/Makefile index 68444aa..9e4ddc4 100644 --- a/vendor/github.com/minio/minio-go/v7/Makefile +++ b/vendor/github.com/minio/minio-go/v7/Makefile @@ -32,6 +32,10 @@ functional-test: @GO111MODULE=on go build -race functional_tests.go @SERVER_ENDPOINT=localhost:9000 ACCESS_KEY=minioadmin SECRET_KEY=minioadmin ENABLE_HTTPS=1 MINT_MODE=full ./functional_tests +functional-test-notls: + @GO111MODULE=on go build -race functional_tests.go + @SERVER_ENDPOINT=localhost:9000 ACCESS_KEY=minioadmin SECRET_KEY=minioadmin ENABLE_HTTPS=0 MINT_MODE=full ./functional_tests + clean: @echo "Cleaning up all the generated files" @find . -name '*.test' | xargs rm -fv diff --git a/vendor/github.com/minio/minio-go/v7/README.md b/vendor/github.com/minio/minio-go/v7/README.md index 82f70a1..be7963c 100644 --- a/vendor/github.com/minio/minio-go/v7/README.md +++ b/vendor/github.com/minio/minio-go/v7/README.md @@ -253,7 +253,7 @@ The full API Reference is available here. * [setbucketencryption.go](https://github.com/minio/minio-go/blob/master/examples/s3/setbucketencryption.go) * [getbucketencryption.go](https://github.com/minio/minio-go/blob/master/examples/s3/getbucketencryption.go) -* [deletebucketencryption.go](https://github.com/minio/minio-go/blob/master/examples/s3/deletebucketencryption.go) +* [removebucketencryption.go](https://github.com/minio/minio-go/blob/master/examples/s3/removebucketencryption.go) ### Full Examples : Bucket replication Operations diff --git a/vendor/github.com/minio/minio-go/v7/api-datatypes.go b/vendor/github.com/minio/minio-go/v7/api-datatypes.go index 97a6f80..8a8fd88 100644 --- a/vendor/github.com/minio/minio-go/v7/api-datatypes.go +++ b/vendor/github.com/minio/minio-go/v7/api-datatypes.go @@ -143,10 +143,11 @@ type UploadInfo struct { // Verified checksum values, if any. // Values are base64 (standard) encoded. // For multipart objects this is a checksum of the checksum of each part. - ChecksumCRC32 string - ChecksumCRC32C string - ChecksumSHA1 string - ChecksumSHA256 string + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string + ChecksumCRC64NVME string } // RestoreInfo contains information of the restore operation of an archived object @@ -215,10 +216,11 @@ type ObjectInfo struct { Restore *RestoreInfo // Checksum values - ChecksumCRC32 string - ChecksumCRC32C string - ChecksumSHA1 string - ChecksumSHA256 string + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string + ChecksumCRC64NVME string Internal *struct { K int // Data blocks diff --git a/vendor/github.com/minio/minio-go/v7/api-get-object.go b/vendor/github.com/minio/minio-go/v7/api-get-object.go index d7fd278..5cc85f6 100644 --- a/vendor/github.com/minio/minio-go/v7/api-get-object.go +++ b/vendor/github.com/minio/minio-go/v7/api-get-object.go @@ -318,7 +318,7 @@ func (o *Object) doGetRequest(request getRequest) (getResponse, error) { response := <-o.resCh // Return any error to the top level. - if response.Error != nil { + if response.Error != nil && response.Error != io.EOF { return response, response.Error } @@ -340,7 +340,7 @@ func (o *Object) doGetRequest(request getRequest) (getResponse, error) { // Data are ready on the wire, no need to reinitiate connection in lower level o.seekData = false - return response, nil + return response, response.Error } // setOffset - handles the setting of offsets for diff --git a/vendor/github.com/minio/minio-go/v7/api-presigned.go b/vendor/github.com/minio/minio-go/v7/api-presigned.go index 9e85f81..2964220 100644 --- a/vendor/github.com/minio/minio-go/v7/api-presigned.go +++ b/vendor/github.com/minio/minio-go/v7/api-presigned.go @@ -140,7 +140,7 @@ func (c *Client) PresignedPostPolicy(ctx context.Context, p *PostPolicy) (u *url } // Get credentials from the configured credentials provider. - credValues, err := c.credsProvider.Get() + credValues, err := c.credsProvider.GetWithContext(c.CredContext()) if err != nil { return nil, nil, err } diff --git a/vendor/github.com/minio/minio-go/v7/api-prompt-object.go b/vendor/github.com/minio/minio-go/v7/api-prompt-object.go new file mode 100644 index 0000000..dac062a --- /dev/null +++ b/vendor/github.com/minio/minio-go/v7/api-prompt-object.go @@ -0,0 +1,78 @@ +/* + * MinIO Go Library for Amazon S3 Compatible Cloud Storage + * Copyright 2015-2024 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package minio + +import ( + "bytes" + "context" + "io" + "net/http" + + "github.com/goccy/go-json" + "github.com/minio/minio-go/v7/pkg/s3utils" +) + +// PromptObject performs language model inference with the prompt and referenced object as context. +// Inference is performed using a Lambda handler that can process the prompt and object. +// Currently, this functionality is limited to certain MinIO servers. +func (c *Client) PromptObject(ctx context.Context, bucketName, objectName, prompt string, opts PromptObjectOptions) (io.ReadCloser, error) { + // Input validation. + if err := s3utils.CheckValidBucketName(bucketName); err != nil { + return nil, ErrorResponse{ + StatusCode: http.StatusBadRequest, + Code: "InvalidBucketName", + Message: err.Error(), + } + } + if err := s3utils.CheckValidObjectName(objectName); err != nil { + return nil, ErrorResponse{ + StatusCode: http.StatusBadRequest, + Code: "XMinioInvalidObjectName", + Message: err.Error(), + } + } + + opts.AddLambdaArnToReqParams(opts.LambdaArn) + opts.SetHeader("Content-Type", "application/json") + opts.AddPromptArg("prompt", prompt) + promptReqBytes, err := json.Marshal(opts.PromptArgs) + if err != nil { + return nil, err + } + + // Execute POST on bucket/object. + resp, err := c.executeMethod(ctx, http.MethodPost, requestMetadata{ + bucketName: bucketName, + objectName: objectName, + queryValues: opts.toQueryValues(), + customHeader: opts.Header(), + contentSHA256Hex: sum256Hex(promptReqBytes), + contentBody: bytes.NewReader(promptReqBytes), + contentLength: int64(len(promptReqBytes)), + }) + if err != nil { + return nil, err + } + + if resp.StatusCode != http.StatusOK { + defer closeResponse(resp) + return nil, httpRespToErrorResponse(resp, bucketName, objectName) + } + + return resp.Body, nil +} diff --git a/vendor/github.com/minio/minio-go/v7/api-prompt-options.go b/vendor/github.com/minio/minio-go/v7/api-prompt-options.go new file mode 100644 index 0000000..4493a75 --- /dev/null +++ b/vendor/github.com/minio/minio-go/v7/api-prompt-options.go @@ -0,0 +1,84 @@ +/* + * MinIO Go Library for Amazon S3 Compatible Cloud Storage + * Copyright 2015-2024 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package minio + +import ( + "net/http" + "net/url" +) + +// PromptObjectOptions provides options to PromptObject call. +// LambdaArn is the ARN of the Prompt Lambda to be invoked. +// PromptArgs is a map of key-value pairs to be passed to the inference action on the Prompt Lambda. +// "prompt" is a reserved key and should not be used as a key in PromptArgs. +type PromptObjectOptions struct { + LambdaArn string + PromptArgs map[string]any + headers map[string]string + reqParams url.Values +} + +// Header returns the http.Header representation of the POST options. +func (o PromptObjectOptions) Header() http.Header { + headers := make(http.Header, len(o.headers)) + for k, v := range o.headers { + headers.Set(k, v) + } + return headers +} + +// AddPromptArg Add a key value pair to the prompt arguments where the key is a string and +// the value is a JSON serializable. +func (o *PromptObjectOptions) AddPromptArg(key string, value any) { + if o.PromptArgs == nil { + o.PromptArgs = make(map[string]any) + } + o.PromptArgs[key] = value +} + +// AddLambdaArnToReqParams adds the lambdaArn to the request query string parameters. +func (o *PromptObjectOptions) AddLambdaArnToReqParams(lambdaArn string) { + if o.reqParams == nil { + o.reqParams = make(url.Values) + } + o.reqParams.Add("lambdaArn", lambdaArn) +} + +// SetHeader adds a key value pair to the options. The +// key-value pair will be part of the HTTP POST request +// headers. +func (o *PromptObjectOptions) SetHeader(key, value string) { + if o.headers == nil { + o.headers = make(map[string]string) + } + o.headers[http.CanonicalHeaderKey(key)] = value +} + +// toQueryValues - Convert the reqParams in Options to query string parameters. +func (o *PromptObjectOptions) toQueryValues() url.Values { + urlValues := make(url.Values) + if o.reqParams != nil { + for key, values := range o.reqParams { + for _, value := range values { + urlValues.Add(key, value) + } + } + } + + return urlValues +} diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object-fan-out.go b/vendor/github.com/minio/minio-go/v7/api-put-object-fan-out.go index 0ae9142..3023b94 100644 --- a/vendor/github.com/minio/minio-go/v7/api-put-object-fan-out.go +++ b/vendor/github.com/minio/minio-go/v7/api-put-object-fan-out.go @@ -85,7 +85,10 @@ func (c *Client) PutObjectFanOut(ctx context.Context, bucket string, fanOutData policy.SetEncryption(fanOutReq.SSE) // Set checksum headers if any. - policy.SetChecksum(fanOutReq.Checksum) + err := policy.SetChecksum(fanOutReq.Checksum) + if err != nil { + return nil, err + } url, formData, err := c.PresignedPostPolicy(ctx, policy) if err != nil { diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go b/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go index a70cbea..03bd34f 100644 --- a/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go +++ b/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go @@ -83,10 +83,7 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj // HTTPS connection. hashAlgos, hashSums := c.hashMaterials(opts.SendContentMd5, !opts.DisableContentSha256) if len(hashSums) == 0 { - if opts.UserMetadata == nil { - opts.UserMetadata = make(map[string]string, 1) - } - opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + addAutoChecksumHeaders(&opts) } // Initiate a new multipart upload. @@ -113,7 +110,6 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj // Create checksums // CRC32C is ~50% faster on AMD64 @ 30GB/s - var crcBytes []byte customHeader := make(http.Header) crc := opts.AutoChecksum.Hasher() for partNumber <= totalPartsCount { @@ -154,7 +150,6 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj crc.Write(buf[:length]) cSum := crc.Sum(nil) customHeader.Set(opts.AutoChecksum.Key(), base64.StdEncoding.EncodeToString(cSum)) - crcBytes = append(crcBytes, cSum...) } p := uploadPartParams{bucketName: bucketName, objectName: objectName, uploadID: uploadID, reader: rd, partNumber: partNumber, md5Base64: md5Base64, sha256Hex: sha256Hex, size: int64(length), sse: opts.ServerSideEncryption, streamSha256: !opts.DisableContentSha256, customHeader: customHeader} @@ -182,18 +177,21 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj // Loop over total uploaded parts to save them in // Parts array before completing the multipart request. + allParts := make([]ObjectPart, 0, len(partsInfo)) for i := 1; i < partNumber; i++ { part, ok := partsInfo[i] if !ok { return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i)) } + allParts = append(allParts, part) complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: part.ETag, - PartNumber: part.PartNumber, - ChecksumCRC32: part.ChecksumCRC32, - ChecksumCRC32C: part.ChecksumCRC32C, - ChecksumSHA1: part.ChecksumSHA1, - ChecksumSHA256: part.ChecksumSHA256, + ETag: part.ETag, + PartNumber: part.PartNumber, + ChecksumCRC32: part.ChecksumCRC32, + ChecksumCRC32C: part.ChecksumCRC32C, + ChecksumSHA1: part.ChecksumSHA1, + ChecksumSHA256: part.ChecksumSHA256, + ChecksumCRC64NVME: part.ChecksumCRC64NVME, }) } @@ -203,12 +201,8 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj ServerSideEncryption: opts.ServerSideEncryption, AutoChecksum: opts.AutoChecksum, } - if len(crcBytes) > 0 { - // Add hash of hashes. - crc.Reset() - crc.Write(crcBytes) - opts.UserMetadata = map[string]string{opts.AutoChecksum.Key(): base64.StdEncoding.EncodeToString(crc.Sum(nil))} - } + applyAutoChecksum(&opts, allParts) + uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) if err != nil { return UploadInfo{}, err @@ -354,10 +348,11 @@ func (c *Client) uploadPart(ctx context.Context, p uploadPartParams) (ObjectPart // Once successfully uploaded, return completed part. h := resp.Header objPart := ObjectPart{ - ChecksumCRC32: h.Get("x-amz-checksum-crc32"), - ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"), - ChecksumSHA1: h.Get("x-amz-checksum-sha1"), - ChecksumSHA256: h.Get("x-amz-checksum-sha256"), + ChecksumCRC32: h.Get(ChecksumCRC32.Key()), + ChecksumCRC32C: h.Get(ChecksumCRC32C.Key()), + ChecksumSHA1: h.Get(ChecksumSHA1.Key()), + ChecksumSHA256: h.Get(ChecksumSHA256.Key()), + ChecksumCRC64NVME: h.Get(ChecksumCRC64NVME.Key()), } objPart.Size = p.size objPart.PartNumber = p.partNumber @@ -457,9 +452,10 @@ func (c *Client) completeMultipartUpload(ctx context.Context, bucketName, object Expiration: expTime, ExpirationRuleID: ruleID, - ChecksumSHA256: completeMultipartUploadResult.ChecksumSHA256, - ChecksumSHA1: completeMultipartUploadResult.ChecksumSHA1, - ChecksumCRC32: completeMultipartUploadResult.ChecksumCRC32, - ChecksumCRC32C: completeMultipartUploadResult.ChecksumCRC32C, + ChecksumSHA256: completeMultipartUploadResult.ChecksumSHA256, + ChecksumSHA1: completeMultipartUploadResult.ChecksumSHA1, + ChecksumCRC32: completeMultipartUploadResult.ChecksumCRC32, + ChecksumCRC32C: completeMultipartUploadResult.ChecksumCRC32C, + ChecksumCRC64NVME: completeMultipartUploadResult.ChecksumCRC64NVME, }, nil } diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go b/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go index 7f31656..3ff3b69 100644 --- a/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go +++ b/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go @@ -52,7 +52,7 @@ func (c *Client) putObjectMultipartStream(ctx context.Context, bucketName, objec } else { info, err = c.putObjectMultipartStreamOptionalChecksum(ctx, bucketName, objectName, reader, size, opts) } - if err != nil { + if err != nil && s3utils.IsGoogleEndpoint(*c.endpointURL) { errResp := ToErrorResponse(err) // Verify if multipart functionality is not available, if not // fall back to single PutObject operation. @@ -108,13 +108,12 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN if err != nil { return UploadInfo{}, err } - + if opts.Checksum.IsSet() { + opts.AutoChecksum = opts.Checksum + } withChecksum := c.trailingHeaderSupport if withChecksum { - if opts.UserMetadata == nil { - opts.UserMetadata = make(map[string]string, 1) - } - opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + addAutoChecksumHeaders(&opts) } // Initiate a new multipart upload. uploadID, err := c.newUploadID(ctx, bucketName, objectName, opts) @@ -238,6 +237,7 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN // Gather the responses as they occur and update any // progress bar. + allParts := make([]ObjectPart, 0, totalPartsCount) for u := 1; u <= totalPartsCount; u++ { select { case <-ctx.Done(): @@ -246,16 +246,17 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN if uploadRes.Error != nil { return UploadInfo{}, uploadRes.Error } - + allParts = append(allParts, uploadRes.Part) // Update the totalUploadedSize. totalUploadedSize += uploadRes.Size complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: uploadRes.Part.ETag, - PartNumber: uploadRes.Part.PartNumber, - ChecksumCRC32: uploadRes.Part.ChecksumCRC32, - ChecksumCRC32C: uploadRes.Part.ChecksumCRC32C, - ChecksumSHA1: uploadRes.Part.ChecksumSHA1, - ChecksumSHA256: uploadRes.Part.ChecksumSHA256, + ETag: uploadRes.Part.ETag, + PartNumber: uploadRes.Part.PartNumber, + ChecksumCRC32: uploadRes.Part.ChecksumCRC32, + ChecksumCRC32C: uploadRes.Part.ChecksumCRC32C, + ChecksumSHA1: uploadRes.Part.ChecksumSHA1, + ChecksumSHA256: uploadRes.Part.ChecksumSHA256, + ChecksumCRC64NVME: uploadRes.Part.ChecksumCRC64NVME, }) } } @@ -273,15 +274,7 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN AutoChecksum: opts.AutoChecksum, } if withChecksum { - // Add hash of hashes. - crc := opts.AutoChecksum.Hasher() - for _, part := range complMultipartUpload.Parts { - cs, err := base64.StdEncoding.DecodeString(part.Checksum(opts.AutoChecksum)) - if err == nil { - crc.Write(cs) - } - } - opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): base64.StdEncoding.EncodeToString(crc.Sum(nil))} + applyAutoChecksum(&opts, allParts) } uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) @@ -304,11 +297,13 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b return UploadInfo{}, err } + if opts.Checksum.IsSet() { + opts.AutoChecksum = opts.Checksum + opts.SendContentMd5 = false + } + if !opts.SendContentMd5 { - if opts.UserMetadata == nil { - opts.UserMetadata = make(map[string]string, 1) - } - opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + addAutoChecksumHeaders(&opts) } // Calculate the optimal parts info for a given size. @@ -335,7 +330,6 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b // Create checksums // CRC32C is ~50% faster on AMD64 @ 30GB/s - var crcBytes []byte customHeader := make(http.Header) crc := opts.AutoChecksum.Hasher() md5Hash := c.md5Hasher() @@ -382,7 +376,6 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b crc.Write(buf[:length]) cSum := crc.Sum(nil) customHeader.Set(opts.AutoChecksum.KeyCapitalized(), base64.StdEncoding.EncodeToString(cSum)) - crcBytes = append(crcBytes, cSum...) } // Update progress reader appropriately to the latest offset @@ -413,18 +406,21 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b // Loop over total uploaded parts to save them in // Parts array before completing the multipart request. + allParts := make([]ObjectPart, 0, len(partsInfo)) for i := 1; i < partNumber; i++ { part, ok := partsInfo[i] if !ok { return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i)) } + allParts = append(allParts, part) complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: part.ETag, - PartNumber: part.PartNumber, - ChecksumCRC32: part.ChecksumCRC32, - ChecksumCRC32C: part.ChecksumCRC32C, - ChecksumSHA1: part.ChecksumSHA1, - ChecksumSHA256: part.ChecksumSHA256, + ETag: part.ETag, + PartNumber: part.PartNumber, + ChecksumCRC32: part.ChecksumCRC32, + ChecksumCRC32C: part.ChecksumCRC32C, + ChecksumSHA1: part.ChecksumSHA1, + ChecksumSHA256: part.ChecksumSHA256, + ChecksumCRC64NVME: part.ChecksumCRC64NVME, }) } @@ -435,12 +431,7 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b ServerSideEncryption: opts.ServerSideEncryption, AutoChecksum: opts.AutoChecksum, } - if len(crcBytes) > 0 { - // Add hash of hashes. - crc.Reset() - crc.Write(crcBytes) - opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): base64.StdEncoding.EncodeToString(crc.Sum(nil))} - } + applyAutoChecksum(&opts, allParts) uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) if err != nil { return UploadInfo{}, err @@ -463,12 +454,12 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam if err = s3utils.CheckValidObjectName(objectName); err != nil { return UploadInfo{}, err } - + if opts.Checksum.IsSet() { + opts.SendContentMd5 = false + opts.AutoChecksum = opts.Checksum + } if !opts.SendContentMd5 { - if opts.UserMetadata == nil { - opts.UserMetadata = make(map[string]string, 1) - } - opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + addAutoChecksumHeaders(&opts) } // Cancel all when an error occurs. @@ -500,7 +491,6 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam // Create checksums // CRC32C is ~50% faster on AMD64 @ 30GB/s - var crcBytes []byte crc := opts.AutoChecksum.Hasher() // Total data read and written to server. should be equal to 'size' at the end of the call. @@ -555,12 +545,11 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam // Calculate md5sum. customHeader := make(http.Header) if !opts.SendContentMd5 { - // Add CRC32C instead. + // Add Checksum instead. crc.Reset() crc.Write(buf[:length]) cSum := crc.Sum(nil) customHeader.Set(opts.AutoChecksum.Key(), base64.StdEncoding.EncodeToString(cSum)) - crcBytes = append(crcBytes, cSum...) } wg.Add(1) @@ -620,18 +609,21 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam // Loop over total uploaded parts to save them in // Parts array before completing the multipart request. + allParts := make([]ObjectPart, 0, len(partsInfo)) for i := 1; i < partNumber; i++ { part, ok := partsInfo[i] if !ok { return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i)) } + allParts = append(allParts, part) complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: part.ETag, - PartNumber: part.PartNumber, - ChecksumCRC32: part.ChecksumCRC32, - ChecksumCRC32C: part.ChecksumCRC32C, - ChecksumSHA1: part.ChecksumSHA1, - ChecksumSHA256: part.ChecksumSHA256, + ETag: part.ETag, + PartNumber: part.PartNumber, + ChecksumCRC32: part.ChecksumCRC32, + ChecksumCRC32C: part.ChecksumCRC32C, + ChecksumSHA1: part.ChecksumSHA1, + ChecksumSHA256: part.ChecksumSHA256, + ChecksumCRC64NVME: part.ChecksumCRC64NVME, }) } @@ -642,12 +634,8 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam ServerSideEncryption: opts.ServerSideEncryption, AutoChecksum: opts.AutoChecksum, } - if len(crcBytes) > 0 { - // Add hash of hashes. - crc.Reset() - crc.Write(crcBytes) - opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): base64.StdEncoding.EncodeToString(crc.Sum(nil))} - } + applyAutoChecksum(&opts, allParts) + uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) if err != nil { return UploadInfo{}, err @@ -677,6 +665,9 @@ func (c *Client) putObject(ctx context.Context, bucketName, objectName string, r if opts.SendContentMd5 && s3utils.IsGoogleEndpoint(*c.endpointURL) && size < 0 { return UploadInfo{}, errInvalidArgument("MD5Sum cannot be calculated with size '-1'") } + if opts.Checksum.IsSet() { + opts.SendContentMd5 = false + } var readSeeker io.Seeker if size > 0 { @@ -746,17 +737,6 @@ func (c *Client) putObjectDo(ctx context.Context, bucketName, objectName string, // Set headers. customHeader := opts.Header() - // Add CRC when client supports it, MD5 is not set, not Google and we don't add SHA256 to chunks. - addCrc := c.trailingHeaderSupport && md5Base64 == "" && !s3utils.IsGoogleEndpoint(*c.endpointURL) && (opts.DisableContentSha256 || c.secure) - - if addCrc { - // If user has added checksums, don't add them ourselves. - for k := range opts.UserMetadata { - if strings.HasPrefix(strings.ToLower(k), "x-amz-checksum-") { - addCrc = false - } - } - } // Populate request metadata. reqMetadata := requestMetadata{ bucketName: bucketName, @@ -768,10 +748,23 @@ func (c *Client) putObjectDo(ctx context.Context, bucketName, objectName string, contentSHA256Hex: sha256Hex, streamSha256: !opts.DisableContentSha256, } - if addCrc { - opts.AutoChecksum.SetDefault(ChecksumCRC32C) - reqMetadata.addCrc = &opts.AutoChecksum + // Add CRC when client supports it, MD5 is not set, not Google and we don't add SHA256 to chunks. + addCrc := c.trailingHeaderSupport && md5Base64 == "" && !s3utils.IsGoogleEndpoint(*c.endpointURL) && (opts.DisableContentSha256 || c.secure) + if opts.Checksum.IsSet() { + reqMetadata.addCrc = &opts.Checksum + } else if addCrc { + // If user has added checksums, don't add them ourselves. + for k := range opts.UserMetadata { + if strings.HasPrefix(strings.ToLower(k), "x-amz-checksum-") { + addCrc = false + } + } + if addCrc { + opts.AutoChecksum.SetDefault(ChecksumCRC32C) + reqMetadata.addCrc = &opts.AutoChecksum + } } + if opts.Internal.SourceVersionID != "" { if opts.Internal.SourceVersionID != nullVersionID { if _, err := uuid.Parse(opts.Internal.SourceVersionID); err != nil { @@ -808,9 +801,10 @@ func (c *Client) putObjectDo(ctx context.Context, bucketName, objectName string, ExpirationRuleID: ruleID, // Checksum values - ChecksumCRC32: h.Get("x-amz-checksum-crc32"), - ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"), - ChecksumSHA1: h.Get("x-amz-checksum-sha1"), - ChecksumSHA256: h.Get("x-amz-checksum-sha256"), + ChecksumCRC32: h.Get(ChecksumCRC32.Key()), + ChecksumCRC32C: h.Get(ChecksumCRC32C.Key()), + ChecksumSHA1: h.Get(ChecksumSHA1.Key()), + ChecksumSHA256: h.Get(ChecksumSHA256.Key()), + ChecksumCRC64NVME: h.Get(ChecksumCRC64NVME.Key()), }, nil } diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object.go b/vendor/github.com/minio/minio-go/v7/api-put-object.go index a792cfe..0981757 100644 --- a/vendor/github.com/minio/minio-go/v7/api-put-object.go +++ b/vendor/github.com/minio/minio-go/v7/api-put-object.go @@ -45,6 +45,8 @@ const ( ReplicationStatusFailed ReplicationStatus = "FAILED" // ReplicationStatusReplica indicates object is a replica of a source ReplicationStatusReplica ReplicationStatus = "REPLICA" + // ReplicationStatusReplicaEdge indicates object is a replica of a edge source + ReplicationStatusReplicaEdge ReplicationStatus = "REPLICA-EDGE" ) // Empty returns true if no replication status set. @@ -94,6 +96,13 @@ type PutObjectOptions struct { // If none is specified CRC32C is used, since it is generally the fastest. AutoChecksum ChecksumType + // Checksum will force a checksum of the specific type. + // This requires that the client was created with "TrailingHeaders:true" option, + // and that the destination server supports it. + // Unavailable with V2 signatures & Google endpoints. + // This will disable content MD5 checksums if set. + Checksum ChecksumType + // ConcurrentStreamParts will create NumThreads buffers of PartSize bytes, // fill them serially and upload them in parallel. // This can be used for faster uploads on non-seekable or slow-to-seek input. @@ -240,7 +249,7 @@ func (opts PutObjectOptions) Header() (header http.Header) { } // validate() checks if the UserMetadata map has standard headers or and raises an error if so. -func (opts PutObjectOptions) validate() (err error) { +func (opts PutObjectOptions) validate(c *Client) (err error) { for k, v := range opts.UserMetadata { if !httpguts.ValidHeaderFieldName(k) || isStandardHeader(k) || isSSEHeader(k) || isStorageClassHeader(k) || isMinioHeader(k) { return errInvalidArgument(k + " unsupported user defined metadata name") @@ -255,6 +264,17 @@ func (opts PutObjectOptions) validate() (err error) { if opts.LegalHold != "" && !opts.LegalHold.IsValid() { return errInvalidArgument(opts.LegalHold.String() + " unsupported legal-hold status") } + if opts.Checksum.IsSet() { + switch { + case !c.trailingHeaderSupport: + return errInvalidArgument("Checksum requires Client with TrailingHeaders enabled") + case c.overrideSignerType.IsV2(): + return errInvalidArgument("Checksum cannot be used with v2 signatures") + case s3utils.IsGoogleEndpoint(*c.endpointURL): + return errInvalidArgument("Checksum cannot be used with GCS endpoints") + } + } + return nil } @@ -291,7 +311,7 @@ func (c *Client) PutObject(ctx context.Context, bucketName, objectName string, r return UploadInfo{}, errors.New("object size must be provided with disable multipart upload") } - err = opts.validate() + err = opts.validate(c) if err != nil { return UploadInfo{}, err } @@ -333,7 +353,7 @@ func (c *Client) putObjectCommon(ctx context.Context, bucketName, objectName str return c.putObjectMultipartStreamNoLength(ctx, bucketName, objectName, reader, opts) } - if size < int64(partSize) || opts.DisableMultipart { + if size <= int64(partSize) || opts.DisableMultipart { return c.putObject(ctx, bucketName, objectName, reader, size, opts) } @@ -362,11 +382,12 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam return UploadInfo{}, err } + if opts.Checksum.IsSet() { + opts.SendContentMd5 = false + opts.AutoChecksum = opts.Checksum + } if !opts.SendContentMd5 { - if opts.UserMetadata == nil { - opts.UserMetadata = make(map[string]string, 1) - } - opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + addAutoChecksumHeaders(&opts) } // Initiate a new multipart upload. @@ -393,7 +414,6 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam // Create checksums // CRC32C is ~50% faster on AMD64 @ 30GB/s - var crcBytes []byte customHeader := make(http.Header) crc := opts.AutoChecksum.Hasher() @@ -419,7 +439,6 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam crc.Write(buf[:length]) cSum := crc.Sum(nil) customHeader.Set(opts.AutoChecksum.Key(), base64.StdEncoding.EncodeToString(cSum)) - crcBytes = append(crcBytes, cSum...) } // Update progress reader appropriately to the latest offset @@ -451,18 +470,21 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam // Loop over total uploaded parts to save them in // Parts array before completing the multipart request. + allParts := make([]ObjectPart, 0, len(partsInfo)) for i := 1; i < partNumber; i++ { part, ok := partsInfo[i] if !ok { return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i)) } + allParts = append(allParts, part) complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: part.ETag, - PartNumber: part.PartNumber, - ChecksumCRC32: part.ChecksumCRC32, - ChecksumCRC32C: part.ChecksumCRC32C, - ChecksumSHA1: part.ChecksumSHA1, - ChecksumSHA256: part.ChecksumSHA256, + ETag: part.ETag, + PartNumber: part.PartNumber, + ChecksumCRC32: part.ChecksumCRC32, + ChecksumCRC32C: part.ChecksumCRC32C, + ChecksumSHA1: part.ChecksumSHA1, + ChecksumSHA256: part.ChecksumSHA256, + ChecksumCRC64NVME: part.ChecksumCRC64NVME, }) } @@ -473,12 +495,8 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam ServerSideEncryption: opts.ServerSideEncryption, AutoChecksum: opts.AutoChecksum, } - if len(crcBytes) > 0 { - // Add hash of hashes. - crc.Reset() - crc.Write(crcBytes) - opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): base64.StdEncoding.EncodeToString(crc.Sum(nil))} - } + applyAutoChecksum(&opts, allParts) + uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) if err != nil { return UploadInfo{}, err diff --git a/vendor/github.com/minio/minio-go/v7/api-putobject-snowball.go b/vendor/github.com/minio/minio-go/v7/api-putobject-snowball.go index eb4da41..6b6559b 100644 --- a/vendor/github.com/minio/minio-go/v7/api-putobject-snowball.go +++ b/vendor/github.com/minio/minio-go/v7/api-putobject-snowball.go @@ -107,7 +107,7 @@ type readSeekCloser interface { // Total size should be < 5TB. // This function blocks until 'objs' is closed and the content has been uploaded. func (c Client) PutObjectsSnowball(ctx context.Context, bucketName string, opts SnowballOptions, objs <-chan SnowballObject) (err error) { - err = opts.Opts.validate() + err = opts.Opts.validate(&c) if err != nil { return err } diff --git a/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go b/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go index 790606c..5e015fb 100644 --- a/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go +++ b/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go @@ -18,6 +18,7 @@ package minio import ( + "encoding/base64" "encoding/xml" "errors" "io" @@ -276,10 +277,45 @@ type ObjectPart struct { Size int64 // Checksum values of each part. - ChecksumCRC32 string - ChecksumCRC32C string - ChecksumSHA1 string - ChecksumSHA256 string + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string + ChecksumCRC64NVME string +} + +// Checksum will return the checksum for the given type. +// Will return the empty string if not set. +func (c ObjectPart) Checksum(t ChecksumType) string { + switch { + case t.Is(ChecksumCRC32C): + return c.ChecksumCRC32C + case t.Is(ChecksumCRC32): + return c.ChecksumCRC32 + case t.Is(ChecksumSHA1): + return c.ChecksumSHA1 + case t.Is(ChecksumSHA256): + return c.ChecksumSHA256 + case t.Is(ChecksumCRC64NVME): + return c.ChecksumCRC64NVME + } + return "" +} + +// ChecksumRaw returns the decoded checksum from the part. +func (c ObjectPart) ChecksumRaw(t ChecksumType) ([]byte, error) { + b64 := c.Checksum(t) + if b64 == "" { + return nil, errors.New("no checksum set") + } + decoded, err := base64.StdEncoding.DecodeString(b64) + if err != nil { + return nil, err + } + if len(decoded) != t.RawByteLen() { + return nil, errors.New("checksum length mismatch") + } + return decoded, nil } // ListObjectPartsResult container for ListObjectParts response. @@ -296,6 +332,12 @@ type ListObjectPartsResult struct { NextPartNumberMarker int MaxParts int + // ChecksumAlgorithm will be CRC32, CRC32C, etc. + ChecksumAlgorithm string + + // ChecksumType is FULL_OBJECT or COMPOSITE (assume COMPOSITE when unset) + ChecksumType string + // Indicates whether the returned list of parts is truncated. IsTruncated bool ObjectParts []ObjectPart `xml:"Part"` @@ -320,10 +362,11 @@ type completeMultipartUploadResult struct { ETag string // Checksum values, hash of hashes of parts. - ChecksumCRC32 string - ChecksumCRC32C string - ChecksumSHA1 string - ChecksumSHA256 string + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string + ChecksumCRC64NVME string } // CompletePart sub container lists individual part numbers and their @@ -334,10 +377,11 @@ type CompletePart struct { ETag string // Checksum values - ChecksumCRC32 string `xml:"ChecksumCRC32,omitempty"` - ChecksumCRC32C string `xml:"ChecksumCRC32C,omitempty"` - ChecksumSHA1 string `xml:"ChecksumSHA1,omitempty"` - ChecksumSHA256 string `xml:"ChecksumSHA256,omitempty"` + ChecksumCRC32 string `xml:"ChecksumCRC32,omitempty"` + ChecksumCRC32C string `xml:"ChecksumCRC32C,omitempty"` + ChecksumSHA1 string `xml:"ChecksumSHA1,omitempty"` + ChecksumSHA256 string `xml:"ChecksumSHA256,omitempty"` + ChecksumCRC64NVME string `xml:",omitempty"` } // Checksum will return the checksum for the given type. @@ -352,6 +396,8 @@ func (c CompletePart) Checksum(t ChecksumType) string { return c.ChecksumSHA1 case t.Is(ChecksumSHA256): return c.ChecksumSHA256 + case t.Is(ChecksumCRC64NVME): + return c.ChecksumCRC64NVME } return "" } diff --git a/vendor/github.com/minio/minio-go/v7/api.go b/vendor/github.com/minio/minio-go/v7/api.go index be28e3f..cb46816 100644 --- a/vendor/github.com/minio/minio-go/v7/api.go +++ b/vendor/github.com/minio/minio-go/v7/api.go @@ -99,6 +99,7 @@ type Client struct { healthStatus int32 trailingHeaderSupport bool + maxRetries int } // Options for New method @@ -123,12 +124,16 @@ type Options struct { // Custom hash routines. Leave nil to use standard. CustomMD5 func() md5simd.Hasher CustomSHA256 func() md5simd.Hasher + + // Number of times a request is retried. Defaults to 10 retries if this option is not configured. + // Set to 1 to disable retries. + MaxRetries int } // Global constants. const ( libraryName = "minio-go" - libraryVersion = "v7.0.76" + libraryVersion = "v7.0.83" ) // User Agent should always following the below style. @@ -278,6 +283,11 @@ func privateNew(endpoint string, opts *Options) (*Client, error) { // healthcheck is not initialized clnt.healthStatus = unknown + clnt.maxRetries = MaxRetry + if opts.MaxRetries > 0 { + clnt.maxRetries = opts.MaxRetries + } + // Return. return clnt, nil } @@ -592,7 +602,7 @@ func (c *Client) executeMethod(ctx context.Context, method string, metadata requ var retryable bool // Indicates if request can be retried. var bodySeeker io.Seeker // Extracted seeker from io.Reader. - reqRetry := MaxRetry // Indicates how many times we can retry the request + reqRetry := c.maxRetries // Indicates how many times we can retry the request if metadata.contentBody != nil { // Check if body is seekable then it is retryable. @@ -661,7 +671,7 @@ func (c *Client) executeMethod(ctx context.Context, method string, metadata requ // Initiate the request. res, err = c.do(req) if err != nil { - if isRequestErrorRetryable(err) { + if isRequestErrorRetryable(ctx, err) { // Retry the request continue } @@ -798,7 +808,7 @@ func (c *Client) newRequest(ctx context.Context, method string, metadata request } // Get credentials from the configured credentials provider. - value, err := c.credsProvider.Get() + value, err := c.credsProvider.GetWithContext(c.CredContext()) if err != nil { return nil, err } @@ -1008,3 +1018,15 @@ func (c *Client) isVirtualHostStyleRequest(url url.URL, bucketName string) bool // path style requests return s3utils.IsVirtualHostSupported(url, bucketName) } + +// CredContext returns the context for fetching credentials +func (c *Client) CredContext() *credentials.CredContext { + httpClient := c.httpClient + if httpClient == nil { + httpClient = http.DefaultClient + } + return &credentials.CredContext{ + Client: httpClient, + Endpoint: c.endpointURL.String(), + } +} diff --git a/vendor/github.com/minio/minio-go/v7/bucket-cache.go b/vendor/github.com/minio/minio-go/v7/bucket-cache.go index b1d3b38..4e4305a 100644 --- a/vendor/github.com/minio/minio-go/v7/bucket-cache.go +++ b/vendor/github.com/minio/minio-go/v7/bucket-cache.go @@ -212,7 +212,7 @@ func (c *Client) getBucketLocationRequest(ctx context.Context, bucketName string c.setUserAgent(req) // Get credentials from the configured credentials provider. - value, err := c.credsProvider.Get() + value, err := c.credsProvider.GetWithContext(c.CredContext()) if err != nil { return nil, err } diff --git a/vendor/github.com/minio/minio-go/v7/checksum.go b/vendor/github.com/minio/minio-go/v7/checksum.go index 7eb1bf2..8e4c27c 100644 --- a/vendor/github.com/minio/minio-go/v7/checksum.go +++ b/vendor/github.com/minio/minio-go/v7/checksum.go @@ -21,11 +21,15 @@ import ( "crypto/sha1" "crypto/sha256" "encoding/base64" + "encoding/binary" + "errors" "hash" "hash/crc32" + "hash/crc64" "io" "math/bits" "net/http" + "sort" ) // ChecksumType contains information about the checksum type. @@ -41,23 +45,41 @@ const ( ChecksumCRC32 // ChecksumCRC32C indicates a CRC32 checksum with Castagnoli table. ChecksumCRC32C + // ChecksumCRC64NVME indicates CRC64 with 0xad93d23594c93659 polynomial. + ChecksumCRC64NVME // Keep after all valid checksums checksumLast + // ChecksumFullObject is a modifier that can be used on CRC32 and CRC32C + // to indicate full object checksums. + ChecksumFullObject + // checksumMask is a mask for valid checksum types. checksumMask = checksumLast - 1 // ChecksumNone indicates no checksum. ChecksumNone ChecksumType = 0 - amzChecksumAlgo = "x-amz-checksum-algorithm" - amzChecksumCRC32 = "x-amz-checksum-crc32" - amzChecksumCRC32C = "x-amz-checksum-crc32c" - amzChecksumSHA1 = "x-amz-checksum-sha1" - amzChecksumSHA256 = "x-amz-checksum-sha256" + // ChecksumFullObjectCRC32 indicates full object CRC32 + ChecksumFullObjectCRC32 = ChecksumCRC32 | ChecksumFullObject + + // ChecksumFullObjectCRC32C indicates full object CRC32C + ChecksumFullObjectCRC32C = ChecksumCRC32C | ChecksumFullObject + + amzChecksumAlgo = "x-amz-checksum-algorithm" + amzChecksumCRC32 = "x-amz-checksum-crc32" + amzChecksumCRC32C = "x-amz-checksum-crc32c" + amzChecksumSHA1 = "x-amz-checksum-sha1" + amzChecksumSHA256 = "x-amz-checksum-sha256" + amzChecksumCRC64NVME = "x-amz-checksum-crc64nvme" ) +// Base returns the base type, without modifiers. +func (c ChecksumType) Base() ChecksumType { + return c & checksumMask +} + // Is returns if c is all of t. func (c ChecksumType) Is(t ChecksumType) bool { return c&t == t @@ -75,10 +97,39 @@ func (c ChecksumType) Key() string { return amzChecksumSHA1 case ChecksumSHA256: return amzChecksumSHA256 + case ChecksumCRC64NVME: + return amzChecksumCRC64NVME } return "" } +// CanComposite will return if the checksum type can be used for composite multipart upload on AWS. +func (c ChecksumType) CanComposite() bool { + switch c & checksumMask { + case ChecksumSHA256, ChecksumSHA1, ChecksumCRC32, ChecksumCRC32C: + return true + } + return false +} + +// CanMergeCRC will return if the checksum type can be used for multipart upload on AWS. +func (c ChecksumType) CanMergeCRC() bool { + switch c & checksumMask { + case ChecksumCRC32, ChecksumCRC32C, ChecksumCRC64NVME: + return true + } + return false +} + +// FullObjectRequested will return if the checksum type indicates full object checksum was requested. +func (c ChecksumType) FullObjectRequested() bool { + switch c & (ChecksumFullObject | checksumMask) { + case ChecksumFullObjectCRC32C, ChecksumFullObjectCRC32, ChecksumCRC64NVME: + return true + } + return false +} + // KeyCapitalized returns the capitalized key as used in HTTP headers. func (c ChecksumType) KeyCapitalized() string { return http.CanonicalHeaderKey(c.Key()) @@ -93,10 +144,17 @@ func (c ChecksumType) RawByteLen() int { return sha1.Size case ChecksumSHA256: return sha256.Size + case ChecksumCRC64NVME: + return crc64.Size } return 0 } +const crc64NVMEPolynomial = 0xad93d23594c93659 + +// crc64 uses reversed polynomials. +var crc64Table = crc64.MakeTable(bits.Reverse64(crc64NVMEPolynomial)) + // Hasher returns a hasher corresponding to the checksum type. // Returns nil if no checksum. func (c ChecksumType) Hasher() hash.Hash { @@ -109,13 +167,15 @@ func (c ChecksumType) Hasher() hash.Hash { return sha1.New() case ChecksumSHA256: return sha256.New() + case ChecksumCRC64NVME: + return crc64.New(crc64Table) } return nil } // IsSet returns whether the type is valid and known. func (c ChecksumType) IsSet() bool { - return bits.OnesCount32(uint32(c)) == 1 + return bits.OnesCount32(uint32(c&checksumMask)) == 1 } // SetDefault will set the checksum if not already set. @@ -125,6 +185,16 @@ func (c *ChecksumType) SetDefault(t ChecksumType) { } } +// EncodeToString the encoded hash value of the content provided in b. +func (c ChecksumType) EncodeToString(b []byte) string { + if !c.IsSet() { + return "" + } + h := c.Hasher() + h.Write(b) + return base64.StdEncoding.EncodeToString(h.Sum(nil)) +} + // String returns the type as a string. // CRC32, CRC32C, SHA1, and SHA256 for valid values. // Empty string for unset and "" if not valid. @@ -140,6 +210,8 @@ func (c ChecksumType) String() string { return "SHA256" case ChecksumNone: return "" + case ChecksumCRC64NVME: + return "CRC64NVME" } return "" } @@ -221,3 +293,129 @@ func (c Checksum) Raw() []byte { } return c.r } + +// CompositeChecksum returns the composite checksum of all provided parts. +func (c ChecksumType) CompositeChecksum(p []ObjectPart) (*Checksum, error) { + if !c.CanComposite() { + return nil, errors.New("cannot do composite checksum") + } + sort.Slice(p, func(i, j int) bool { + return p[i].PartNumber < p[j].PartNumber + }) + c = c.Base() + crcBytes := make([]byte, 0, len(p)*c.RawByteLen()) + for _, part := range p { + pCrc, err := part.ChecksumRaw(c) + if err != nil { + return nil, err + } + crcBytes = append(crcBytes, pCrc...) + } + h := c.Hasher() + h.Write(crcBytes) + return &Checksum{Type: c, r: h.Sum(nil)}, nil +} + +// FullObjectChecksum will return the full object checksum from provided parts. +func (c ChecksumType) FullObjectChecksum(p []ObjectPart) (*Checksum, error) { + if !c.CanMergeCRC() { + return nil, errors.New("cannot merge this checksum type") + } + c = c.Base() + sort.Slice(p, func(i, j int) bool { + return p[i].PartNumber < p[j].PartNumber + }) + + switch len(p) { + case 0: + return nil, errors.New("no parts given") + case 1: + check, err := p[0].ChecksumRaw(c) + if err != nil { + return nil, err + } + return &Checksum{ + Type: c, + r: check, + }, nil + } + var merged uint32 + var merged64 uint64 + first, err := p[0].ChecksumRaw(c) + if err != nil { + return nil, err + } + sz := p[0].Size + switch c { + case ChecksumCRC32, ChecksumCRC32C: + merged = binary.BigEndian.Uint32(first) + case ChecksumCRC64NVME: + merged64 = binary.BigEndian.Uint64(first) + } + + poly32 := uint32(crc32.IEEE) + if c.Is(ChecksumCRC32C) { + poly32 = crc32.Castagnoli + } + for _, part := range p[1:] { + if part.Size == 0 { + continue + } + sz += part.Size + pCrc, err := part.ChecksumRaw(c) + if err != nil { + return nil, err + } + switch c { + case ChecksumCRC32, ChecksumCRC32C: + merged = crc32Combine(poly32, merged, binary.BigEndian.Uint32(pCrc), part.Size) + case ChecksumCRC64NVME: + merged64 = crc64Combine(bits.Reverse64(crc64NVMEPolynomial), merged64, binary.BigEndian.Uint64(pCrc), part.Size) + } + } + var tmp [8]byte + switch c { + case ChecksumCRC32, ChecksumCRC32C: + binary.BigEndian.PutUint32(tmp[:], merged) + return &Checksum{ + Type: c, + r: tmp[:4], + }, nil + case ChecksumCRC64NVME: + binary.BigEndian.PutUint64(tmp[:], merged64) + return &Checksum{ + Type: c, + r: tmp[:8], + }, nil + default: + return nil, errors.New("unknown checksum type") + } +} + +func addAutoChecksumHeaders(opts *PutObjectOptions) { + if opts.UserMetadata == nil { + opts.UserMetadata = make(map[string]string, 1) + } + opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + if opts.AutoChecksum.FullObjectRequested() { + opts.UserMetadata["X-Amz-Checksum-Type"] = "FULL_OBJECT" + } +} + +func applyAutoChecksum(opts *PutObjectOptions, allParts []ObjectPart) { + if !opts.AutoChecksum.IsSet() { + return + } + if opts.AutoChecksum.CanComposite() && !opts.AutoChecksum.Is(ChecksumFullObject) { + // Add composite hash of hashes. + crc, err := opts.AutoChecksum.CompositeChecksum(allParts) + if err == nil { + opts.UserMetadata = map[string]string{opts.AutoChecksum.Key(): crc.Encoded()} + } + } else if opts.AutoChecksum.CanMergeCRC() { + crc, err := opts.AutoChecksum.FullObjectChecksum(allParts) + if err == nil { + opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): crc.Encoded(), "X-Amz-Checksum-Type": "FULL_OBJECT"} + } + } +} diff --git a/vendor/github.com/minio/minio-go/v7/functional_tests.go b/vendor/github.com/minio/minio-go/v7/functional_tests.go index 8a908e3..33e87e6 100644 --- a/vendor/github.com/minio/minio-go/v7/functional_tests.go +++ b/vendor/github.com/minio/minio-go/v7/functional_tests.go @@ -83,7 +83,7 @@ func createHTTPTransport() (transport *http.Transport) { return nil } - if mustParseBool(os.Getenv(skipCERTValidation)) { + if mustParseBool(os.Getenv(enableHTTPS)) && mustParseBool(os.Getenv(skipCERTValidation)) { transport.TLSClientConfig.InsecureSkipVerify = true } @@ -160,7 +160,7 @@ func logError(testName, function string, args map[string]interface{}, startTime } else { logFailure(testName, function, args, startTime, alert, message, err) if !isRunOnFail() { - panic(err) + panic(fmt.Sprintf("Test failed with message: %s, err: %v", message, err)) } } } @@ -393,6 +393,42 @@ func getFuncNameLoc(caller int) string { return strings.TrimPrefix(runtime.FuncForPC(pc).Name(), "main.") } +type ClientConfig struct { + // MinIO client configuration + TraceOn bool // Turn on tracing of HTTP requests and responses to stderr + CredsV2 bool // Use V2 credentials if true, otherwise use v4 + TrailingHeaders bool // Send trailing headers in requests +} + +func NewClient(config ClientConfig) (*minio.Client, error) { + // Instantiate new MinIO client + var creds *credentials.Credentials + if config.CredsV2 { + creds = credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), "") + } else { + creds = credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), "") + } + opts := &minio.Options{ + Creds: creds, + Transport: createHTTPTransport(), + Secure: mustParseBool(os.Getenv(enableHTTPS)), + TrailingHeaders: config.TrailingHeaders, + } + client, err := minio.New(os.Getenv(serverEndpoint), opts) + if err != nil { + return nil, err + } + + if config.TraceOn { + client.TraceOn(os.Stderr) + } + + // Set user agent. + client.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + + return client, nil +} + // Tests bucket re-create errors. func testMakeBucketError() { region := "eu-central-1" @@ -407,27 +443,12 @@ func testMakeBucketError() { "region": region, } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - Transport: createHTTPTransport(), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -462,20 +483,12 @@ func testMetadataSizeLimit() { "objectName": "", "opts.UserMetadata": "", } - rand.Seed(startTime.Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - Transport: createHTTPTransport(), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client creation failed", err) return } - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -531,27 +544,12 @@ func testMakeBucketRegions() { "region": region, } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -598,27 +596,12 @@ func testPutObjectReadAt() { "opts": "objectContentType", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -697,27 +680,12 @@ func testListObjectVersions() { "recursive": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -817,27 +785,12 @@ func testStatObjectWithVersioning() { function := "StatObject" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -935,27 +888,12 @@ func testGetObjectWithVersioning() { function := "GetObject()" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -1075,27 +1013,12 @@ func testPutObjectWithVersioning() { function := "GetObject()" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -1223,28 +1146,12 @@ func testListMultipartUpload() { function := "GetObject()" args := map[string]interface{}{} - // Instantiate new minio client object. - opts := &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - } - c, err := minio.New(os.Getenv(serverEndpoint), opts) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - core, err := minio.NewCore(os.Getenv(serverEndpoint), opts) - if err != nil { - logError(testName, function, args, startTime, "", "MinIO core client object creation failed", err) - return - } - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + core := minio.Core{Client: c} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") @@ -1347,27 +1254,12 @@ func testCopyObjectWithVersioning() { function := "CopyObject()" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -1485,27 +1377,12 @@ func testConcurrentCopyObjectWithVersioning() { function := "CopyObject()" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -1646,27 +1523,12 @@ func testComposeObjectWithVersioning() { function := "ComposeObject()" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -1787,27 +1649,12 @@ func testRemoveObjectWithVersioning() { function := "DeleteObject()" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -1900,27 +1747,12 @@ func testRemoveObjectsWithVersioning() { function := "DeleteObjects()" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -1996,27 +1828,12 @@ func testObjectTaggingWithVersioning() { function := "{Get,Set,Remove}ObjectTagging()" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -2164,27 +1981,12 @@ func testPutObjectWithChecksums() { return } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -2204,9 +2006,13 @@ func testPutObjectWithChecksums() { {cs: minio.ChecksumCRC32}, {cs: minio.ChecksumSHA1}, {cs: minio.ChecksumSHA256}, + {cs: minio.ChecksumCRC64NVME}, } for _, test := range tests { + if os.Getenv("MINT_NO_FULL_OBJECT") != "" && test.cs.FullObjectRequested() { + continue + } bufSize := dataFileMap["datafile-10-kB"] // Save the data @@ -2230,7 +2036,7 @@ func testPutObjectWithChecksums() { h := test.cs.Hasher() h.Reset() - // Test with Wrong CRC. + // Test with a bad CRC - we haven't called h.Write(b), so this is a checksum of empty data meta[test.cs.Key()] = base64.StdEncoding.EncodeToString(h.Sum(nil)) args["metadata"] = meta args["range"] = "false" @@ -2263,6 +2069,7 @@ func testPutObjectWithChecksums() { cmpChecksum(resp.ChecksumSHA1, meta["x-amz-checksum-sha1"]) cmpChecksum(resp.ChecksumCRC32, meta["x-amz-checksum-crc32"]) cmpChecksum(resp.ChecksumCRC32C, meta["x-amz-checksum-crc32c"]) + cmpChecksum(resp.ChecksumCRC64NVME, meta["x-amz-checksum-crc64nvme"]) // Read the data back gopts := minio.GetObjectOptions{Checksum: true} @@ -2282,6 +2089,7 @@ func testPutObjectWithChecksums() { cmpChecksum(st.ChecksumSHA1, meta["x-amz-checksum-sha1"]) cmpChecksum(st.ChecksumCRC32, meta["x-amz-checksum-crc32"]) cmpChecksum(st.ChecksumCRC32C, meta["x-amz-checksum-crc32c"]) + cmpChecksum(st.ChecksumCRC64NVME, meta["x-amz-checksum-crc64nvme"]) if st.Size != int64(bufSize) { logError(testName, function, args, startTime, "", "Number of bytes returned by PutObject does not match GetObject, expected "+string(bufSize)+" got "+string(st.Size), err) @@ -2325,16 +2133,16 @@ func testPutObjectWithChecksums() { cmpChecksum(st.ChecksumSHA1, "") cmpChecksum(st.ChecksumCRC32, "") cmpChecksum(st.ChecksumCRC32C, "") + cmpChecksum(st.ChecksumCRC64NVME, "") delete(args, "range") delete(args, "metadata") + logSuccess(testName, function, args, startTime) } - - logSuccess(testName, function, args, startTime) } // Test PutObject with custom checksums. -func testPutMultipartObjectWithChecksums() { +func testPutObjectWithTrailingChecksums() { // initialize logging params startTime := time.Now() testName := getFuncName() @@ -2342,7 +2150,7 @@ func testPutMultipartObjectWithChecksums() { args := map[string]interface{}{ "bucketName": "", "objectName": "", - "opts": "minio.PutObjectOptions{UserMetadata: metadata, Progress: progress}", + "opts": "minio.PutObjectOptions{UserMetadata: metadata, Progress: progress, TrailChecksum: xxx}", } if !isFullMode() { @@ -2350,26 +2158,191 @@ func testPutMultipartObjectWithChecksums() { return } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{TrailingHeaders: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) + // Generate a new random bucket name. + bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") + args["bucketName"] = bucketName - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + // Make a new bucket. + err = c.MakeBucket(context.Background(), bucketName, minio.MakeBucketOptions{Region: "us-east-1"}) + if err != nil { + logError(testName, function, args, startTime, "", "Make bucket failed", err) + return + } + + defer cleanupBucket(bucketName, c) + tests := []struct { + cs minio.ChecksumType + }{ + {cs: minio.ChecksumCRC64NVME}, + {cs: minio.ChecksumCRC32C}, + {cs: minio.ChecksumCRC32}, + {cs: minio.ChecksumSHA1}, + {cs: minio.ChecksumSHA256}, + } + for _, test := range tests { + if os.Getenv("MINT_NO_FULL_OBJECT") != "" && test.cs.FullObjectRequested() { + continue + } + function := "PutObject(bucketName, objectName, reader,size, opts)" + bufSize := dataFileMap["datafile-10-kB"] + + // Save the data + objectName := randString(60, rand.NewSource(time.Now().UnixNano()), "") + args["objectName"] = objectName + + cmpChecksum := func(got, want string) { + if want != got { + logError(testName, function, args, startTime, "", "checksum mismatch", fmt.Errorf("want %s, got %s", want, got)) + return + } + } + + meta := map[string]string{} + reader := getDataReader("datafile-10-kB") + b, err := io.ReadAll(reader) + if err != nil { + logError(testName, function, args, startTime, "", "Read failed", err) + return + } + h := test.cs.Hasher() + h.Reset() + + // Test with Wrong CRC. + args["metadata"] = meta + args["range"] = "false" + args["checksum"] = test.cs.String() + + resp, err := c.PutObject(context.Background(), bucketName, objectName, bytes.NewReader(b), int64(bufSize), minio.PutObjectOptions{ + DisableMultipart: true, + DisableContentSha256: true, + UserMetadata: meta, + Checksum: test.cs, + }) + if err != nil { + logError(testName, function, args, startTime, "", "PutObject failed", err) + return + } + + h.Write(b) + meta[test.cs.Key()] = base64.StdEncoding.EncodeToString(h.Sum(nil)) + + cmpChecksum(resp.ChecksumSHA256, meta["x-amz-checksum-sha256"]) + cmpChecksum(resp.ChecksumSHA1, meta["x-amz-checksum-sha1"]) + cmpChecksum(resp.ChecksumCRC32, meta["x-amz-checksum-crc32"]) + cmpChecksum(resp.ChecksumCRC32C, meta["x-amz-checksum-crc32c"]) + cmpChecksum(resp.ChecksumCRC64NVME, meta["x-amz-checksum-crc64nvme"]) + + // Read the data back + gopts := minio.GetObjectOptions{Checksum: true} + + function = "GetObject(...)" + r, err := c.GetObject(context.Background(), bucketName, objectName, gopts) + if err != nil { + logError(testName, function, args, startTime, "", "GetObject failed", err) + return + } + + st, err := r.Stat() + if err != nil { + logError(testName, function, args, startTime, "", "Stat failed", err) + return + } + cmpChecksum(st.ChecksumSHA256, meta["x-amz-checksum-sha256"]) + cmpChecksum(st.ChecksumSHA1, meta["x-amz-checksum-sha1"]) + cmpChecksum(st.ChecksumCRC32, meta["x-amz-checksum-crc32"]) + cmpChecksum(st.ChecksumCRC32C, meta["x-amz-checksum-crc32c"]) + cmpChecksum(resp.ChecksumCRC64NVME, meta["x-amz-checksum-crc64nvme"]) + + if st.Size != int64(bufSize) { + logError(testName, function, args, startTime, "", "Number of bytes returned by PutObject does not match GetObject, expected "+string(bufSize)+" got "+string(st.Size), err) + return + } + + if err := r.Close(); err != nil { + logError(testName, function, args, startTime, "", "Object Close failed", err) + return + } + if err := r.Close(); err == nil { + logError(testName, function, args, startTime, "", "Object already closed, should respond with error", err) + return + } + + function = "GetObject( Range...)" + args["range"] = "true" + err = gopts.SetRange(100, 1000) + if err != nil { + logError(testName, function, args, startTime, "", "SetRange failed", err) + return + } + r, err = c.GetObject(context.Background(), bucketName, objectName, gopts) + if err != nil { + logError(testName, function, args, startTime, "", "GetObject failed", err) + return + } + + b, err = io.ReadAll(r) + if err != nil { + logError(testName, function, args, startTime, "", "Read failed", err) + return + } + st, err = r.Stat() + if err != nil { + logError(testName, function, args, startTime, "", "Stat failed", err) + return + } + + // Range requests should return empty checksums... + cmpChecksum(st.ChecksumSHA256, "") + cmpChecksum(st.ChecksumSHA1, "") + cmpChecksum(st.ChecksumCRC32, "") + cmpChecksum(st.ChecksumCRC32C, "") + cmpChecksum(st.ChecksumCRC64NVME, "") + + function = "GetObjectAttributes(...)" + s, err := c.GetObjectAttributes(context.Background(), bucketName, objectName, minio.ObjectAttributesOptions{}) + if err != nil { + logError(testName, function, args, startTime, "", "GetObjectAttributes failed", err) + return + } + cmpChecksum(s.Checksum.ChecksumSHA256, meta["x-amz-checksum-sha256"]) + cmpChecksum(s.Checksum.ChecksumSHA1, meta["x-amz-checksum-sha1"]) + cmpChecksum(s.Checksum.ChecksumCRC32, meta["x-amz-checksum-crc32"]) + cmpChecksum(s.Checksum.ChecksumCRC32C, meta["x-amz-checksum-crc32c"]) + + delete(args, "range") + delete(args, "metadata") + logSuccess(testName, function, args, startTime) + } +} + +// Test PutObject with custom checksums. +func testPutMultipartObjectWithChecksums(trailing bool) { + // initialize logging params + startTime := time.Now() + testName := getFuncName() + function := "PutObject(bucketName, objectName, reader,size, opts)" + args := map[string]interface{}{ + "bucketName": "", + "objectName": "", + "opts": fmt.Sprintf("minio.PutObjectOptions{UserMetadata: metadata, Trailing: %v}", trailing), + } + + if !isFullMode() { + logIgnored(testName, function, args, startTime, "Skipping functional tests for short/quick runs") + return + } + + c, err := NewClient(ClientConfig{TrailingHeaders: trailing}) + if err != nil { + logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) + return + } // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") @@ -2382,14 +2355,18 @@ func testPutMultipartObjectWithChecksums() { return } - hashMultiPart := func(b []byte, partSize int, hasher hash.Hash) string { + hashMultiPart := func(b []byte, partSize int, cs minio.ChecksumType) string { r := bytes.NewReader(b) + hasher := cs.Hasher() + if cs.FullObjectRequested() { + partSize = len(b) + } tmp := make([]byte, partSize) parts := 0 var all []byte for { n, err := io.ReadFull(r, tmp) - if err != nil && err != io.ErrUnexpectedEOF { + if err != nil && err != io.ErrUnexpectedEOF && err != io.EOF { logError(testName, function, args, startTime, "", "Calc crc failed", err) } if n == 0 { @@ -2403,6 +2380,9 @@ func testPutMultipartObjectWithChecksums() { break } } + if parts == 1 { + return base64.StdEncoding.EncodeToString(hasher.Sum(nil)) + } hasher.Reset() hasher.Write(all) return fmt.Sprintf("%s-%d", base64.StdEncoding.EncodeToString(hasher.Sum(nil)), parts) @@ -2411,6 +2391,9 @@ func testPutMultipartObjectWithChecksums() { tests := []struct { cs minio.ChecksumType }{ + {cs: minio.ChecksumFullObjectCRC32}, + {cs: minio.ChecksumFullObjectCRC32C}, + {cs: minio.ChecksumCRC64NVME}, {cs: minio.ChecksumCRC32C}, {cs: minio.ChecksumCRC32}, {cs: minio.ChecksumSHA1}, @@ -2418,8 +2401,12 @@ func testPutMultipartObjectWithChecksums() { } for _, test := range tests { - bufSize := dataFileMap["datafile-129-MB"] + if os.Getenv("MINT_NO_FULL_OBJECT") != "" && test.cs.FullObjectRequested() { + continue + } + args["section"] = "prep" + bufSize := dataFileMap["datafile-129-MB"] // Save the data objectName := randString(60, rand.NewSource(time.Now().UnixNano()), "") args["objectName"] = objectName @@ -2428,7 +2415,7 @@ func testPutMultipartObjectWithChecksums() { cmpChecksum := func(got, want string) { if want != got { logError(testName, function, args, startTime, "", "checksum mismatch", fmt.Errorf("want %s, got %s", want, got)) - //fmt.Printf("want %s, got %s\n", want, got) + // fmt.Printf("want %s, got %s\n", want, got) return } } @@ -2443,23 +2430,31 @@ func testPutMultipartObjectWithChecksums() { reader.Close() h := test.cs.Hasher() h.Reset() - want := hashMultiPart(b, partSize, test.cs.Hasher()) + want := hashMultiPart(b, partSize, test.cs) - // Set correct CRC. + var cs minio.ChecksumType + rd := io.Reader(io.NopCloser(bytes.NewReader(b))) + if trailing { + cs = test.cs + rd = bytes.NewReader(b) + } - resp, err := c.PutObject(context.Background(), bucketName, objectName, io.NopCloser(bytes.NewReader(b)), int64(bufSize), minio.PutObjectOptions{ + // Set correct CRC. + args["section"] = "PutObject" + resp, err := c.PutObject(context.Background(), bucketName, objectName, rd, int64(bufSize), minio.PutObjectOptions{ DisableContentSha256: true, DisableMultipart: false, UserMetadata: nil, PartSize: partSize, AutoChecksum: test.cs, + Checksum: cs, }) if err != nil { logError(testName, function, args, startTime, "", "PutObject failed", err) return } - switch test.cs { + switch test.cs.Base() { case minio.ChecksumCRC32C: cmpChecksum(resp.ChecksumCRC32C, want) case minio.ChecksumCRC32: @@ -2468,15 +2463,41 @@ func testPutMultipartObjectWithChecksums() { cmpChecksum(resp.ChecksumSHA1, want) case minio.ChecksumSHA256: cmpChecksum(resp.ChecksumSHA256, want) + case minio.ChecksumCRC64NVME: + cmpChecksum(resp.ChecksumCRC64NVME, want) } + args["section"] = "HeadObject" + st, err := c.StatObject(context.Background(), bucketName, objectName, minio.StatObjectOptions{Checksum: true}) + if err != nil { + logError(testName, function, args, startTime, "", "StatObject failed", err) + return + } + switch test.cs.Base() { + case minio.ChecksumCRC32C: + cmpChecksum(st.ChecksumCRC32C, want) + case minio.ChecksumCRC32: + cmpChecksum(st.ChecksumCRC32, want) + case minio.ChecksumSHA1: + cmpChecksum(st.ChecksumSHA1, want) + case minio.ChecksumSHA256: + cmpChecksum(st.ChecksumSHA256, want) + case minio.ChecksumCRC64NVME: + cmpChecksum(st.ChecksumCRC64NVME, want) + } + + args["section"] = "GetObjectAttributes" s, err := c.GetObjectAttributes(context.Background(), bucketName, objectName, minio.ObjectAttributesOptions{}) if err != nil { logError(testName, function, args, startTime, "", "GetObjectAttributes failed", err) return } - want = want[:strings.IndexByte(want, '-')] + + if strings.ContainsRune(want, '-') { + want = want[:strings.IndexByte(want, '-')] + } switch test.cs { + // Full Object CRC does not return anything with GetObjectAttributes case minio.ChecksumCRC32C: cmpChecksum(s.Checksum.ChecksumCRC32C, want) case minio.ChecksumCRC32: @@ -2492,13 +2513,14 @@ func testPutMultipartObjectWithChecksums() { gopts.PartNumber = 2 // We cannot use StatObject, since it ignores partnumber. + args["section"] = "GetObject-Part" r, err := c.GetObject(context.Background(), bucketName, objectName, gopts) if err != nil { logError(testName, function, args, startTime, "", "GetObject failed", err) return } io.Copy(io.Discard, r) - st, err := r.Stat() + st, err = r.Stat() if err != nil { logError(testName, function, args, startTime, "", "Stat failed", err) return @@ -2510,6 +2532,7 @@ func testPutMultipartObjectWithChecksums() { want = base64.StdEncoding.EncodeToString(h.Sum(nil)) switch test.cs { + // Full Object CRC does not return any part CRC for whatever reason. case minio.ChecksumCRC32C: cmpChecksum(st.ChecksumCRC32C, want) case minio.ChecksumCRC32: @@ -2518,12 +2541,17 @@ func testPutMultipartObjectWithChecksums() { cmpChecksum(st.ChecksumSHA1, want) case minio.ChecksumSHA256: cmpChecksum(st.ChecksumSHA256, want) + case minio.ChecksumCRC64NVME: + // AWS doesn't return part checksum, but may in the future. + if st.ChecksumCRC64NVME != "" { + cmpChecksum(st.ChecksumCRC64NVME, want) + } } delete(args, "metadata") + delete(args, "section") + logSuccess(testName, function, args, startTime) } - - logSuccess(testName, function, args, startTime) } // Test PutObject with trailing checksums. @@ -2543,25 +2571,12 @@ func testTrailingChecksums() { return } - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - TrailingHeaders: true, - }) + c, err := NewClient(ClientConfig{TrailingHeaders: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -2683,7 +2698,6 @@ func testTrailingChecksums() { test.ChecksumCRC32C = hashMultiPart(b, int(test.PO.PartSize), test.hasher) // Set correct CRC. - // c.TraceOn(os.Stderr) resp, err := c.PutObject(context.Background(), bucketName, objectName, bytes.NewReader(b), int64(bufSize), test.PO) if err != nil { logError(testName, function, args, startTime, "", "PutObject failed", err) @@ -2734,6 +2748,7 @@ func testTrailingChecksums() { } delete(args, "metadata") + logSuccess(testName, function, args, startTime) } } @@ -2754,25 +2769,12 @@ func testPutObjectWithAutomaticChecksums() { return } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - TrailingHeaders: true, - }) + c, err := NewClient(ClientConfig{TrailingHeaders: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -2799,8 +2801,6 @@ func testPutObjectWithAutomaticChecksums() { {header: "x-amz-checksum-crc32c", hasher: crc32.New(crc32.MakeTable(crc32.Castagnoli))}, } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) // defer c.TraceOff() for i, test := range tests { @@ -2910,20 +2910,12 @@ func testGetObjectAttributes() { return } - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - TrailingHeaders: true, - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{TrailingHeaders: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName err = c.MakeBucket( @@ -2982,6 +2974,7 @@ func testGetObjectAttributes() { testFiles[i].UploadInfo, err = c.PutObject(context.Background(), v.Bucket, v.Object, reader, int64(bufSize), minio.PutObjectOptions{ ContentType: v.ContentType, SendContentMd5: v.SendContentMd5, + Checksum: minio.ChecksumCRC32C, }) if err != nil { logError(testName, function, args, startTime, "", "PutObject failed", err) @@ -3063,7 +3056,7 @@ func testGetObjectAttributes() { test: objectAttributesTestOptions{ TestFileName: "file1", StorageClass: "STANDARD", - HasFullChecksum: false, + HasFullChecksum: true, }, } @@ -3116,19 +3109,12 @@ func testGetObjectAttributesSSECEncryption() { return } - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - TrailingHeaders: true, - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - Transport: createHTTPTransport(), - }) + c, err := NewClient(ClientConfig{TrailingHeaders: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName err = c.MakeBucket( @@ -3152,9 +3138,10 @@ func testGetObjectAttributesSSECEncryption() { info, err := c.PutObject(context.Background(), bucketName, objectName, reader, int64(bufSize), minio.PutObjectOptions{ ContentType: "content/custom", - SendContentMd5: true, + SendContentMd5: false, ServerSideEncryption: sse, PartSize: uint64(bufSize) / 2, + Checksum: minio.ChecksumCRC32C, }) if err != nil { logError(testName, function, args, startTime, "", "PutObject failed", err) @@ -3174,9 +3161,9 @@ func testGetObjectAttributesSSECEncryption() { ETag: info.ETag, NumberOfParts: 2, ObjectSize: int(info.Size), - HasFullChecksum: false, + HasFullChecksum: true, HasParts: true, - HasPartChecksums: false, + HasPartChecksums: true, }) if err != nil { logError(testName, function, args, startTime, "", "Validating GetObjectsAttributes response failed", err) @@ -3201,19 +3188,12 @@ func testGetObjectAttributesErrorCases() { return } - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - TrailingHeaders: true, - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{TrailingHeaders: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) unknownBucket := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-bucket-") unknownObject := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-object-") @@ -3365,16 +3345,10 @@ func validateObjectAttributeRequest(OA *minio.ObjectAttributes, opts *minio.Obje } } - hasFullObjectChecksum := true - if OA.Checksum.ChecksumCRC32 == "" { - if OA.Checksum.ChecksumCRC32C == "" { - if OA.Checksum.ChecksumSHA1 == "" { - if OA.Checksum.ChecksumSHA256 == "" { - hasFullObjectChecksum = false - } - } - } - } + hasFullObjectChecksum := (OA.Checksum.ChecksumCRC32 != "" || + OA.Checksum.ChecksumCRC32C != "" || + OA.Checksum.ChecksumSHA1 != "" || + OA.Checksum.ChecksumSHA256 != "") if test.HasFullChecksum { if !hasFullObjectChecksum { @@ -3463,27 +3437,12 @@ func testPutObjectWithMetadata() { return } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -3570,27 +3529,12 @@ func testPutObjectWithContentLanguage() { "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -3640,27 +3584,12 @@ func testPutObjectStreaming() { "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -3712,27 +3641,12 @@ func testGetObjectSeekEnd() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -3835,27 +3749,12 @@ func testGetObjectClosedTwice() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -3926,26 +3825,13 @@ func testRemoveObjectsContext() { "bucketName": "", } - // Seed random based on current tie. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Enable tracing, write to stdout. - // c.TraceOn(os.Stderr) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4023,27 +3909,12 @@ func testRemoveMultipleObjects() { "bucketName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - - // Enable tracing, write to stdout. - // c.TraceOn(os.Stderr) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4107,27 +3978,12 @@ func testRemoveMultipleObjectsWithResult() { "bucketName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - - // Enable tracing, write to stdout. - // c.TraceOn(os.Stderr) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4243,27 +4099,12 @@ func testFPutObjectMultipart() { "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4349,27 +4190,12 @@ func testFPutObject() { "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") location := "us-east-1" @@ -4519,27 +4345,13 @@ func testFPutObjectContext() { "fileName": "", "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4620,27 +4432,13 @@ func testFPutObjectContextV2() { "objectName": "", "opts": "minio.PutObjectOptions{ContentType:objectContentType}", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4725,24 +4523,12 @@ func testPutObjectContext() { "opts": "", } - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Make a new bucket. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4795,27 +4581,12 @@ func testGetObjectS3Zip() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{"x-minio-extract": true} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4979,27 +4750,12 @@ func testGetObjectReadSeekFunctional() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -5149,27 +4905,12 @@ func testGetObjectReadAtFunctional() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -5327,27 +5068,12 @@ func testGetObjectReadAtWhenEOFWasReached() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -5447,27 +5173,12 @@ func testPresignedPostPolicy() { "policy": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") @@ -5495,50 +5206,22 @@ func testPresignedPostPolicy() { return } - // Save the data - _, err = c.PutObject(context.Background(), bucketName, objectName, bytes.NewReader(buf), int64(len(buf)), minio.PutObjectOptions{ContentType: "binary/octet-stream"}) - if err != nil { - logError(testName, function, args, startTime, "", "PutObject failed", err) - return - } - policy := minio.NewPostPolicy() - - if err := policy.SetBucket(""); err == nil { - logError(testName, function, args, startTime, "", "SetBucket did not fail for invalid conditions", err) - return - } - if err := policy.SetKey(""); err == nil { - logError(testName, function, args, startTime, "", "SetKey did not fail for invalid conditions", err) - return - } - if err := policy.SetExpires(time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC)); err == nil { - logError(testName, function, args, startTime, "", "SetExpires did not fail for invalid conditions", err) - return - } - if err := policy.SetContentType(""); err == nil { - logError(testName, function, args, startTime, "", "SetContentType did not fail for invalid conditions", err) - return - } - if err := policy.SetContentLengthRange(1024*1024, 1024); err == nil { - logError(testName, function, args, startTime, "", "SetContentLengthRange did not fail for invalid conditions", err) - return - } - if err := policy.SetUserMetadata("", ""); err == nil { - logError(testName, function, args, startTime, "", "SetUserMetadata did not fail for invalid conditions", err) - return - } - policy.SetBucket(bucketName) policy.SetKey(objectName) policy.SetExpires(time.Now().UTC().AddDate(0, 0, 10)) // expires in 10 days policy.SetContentType("binary/octet-stream") policy.SetContentLengthRange(10, 1024*1024) policy.SetUserMetadata(metadataKey, metadataValue) + policy.SetContentEncoding("gzip") // Add CRC32C checksum := minio.ChecksumCRC32C.ChecksumBytes(buf) - policy.SetChecksum(checksum) + err = policy.SetChecksum(checksum) + if err != nil { + logError(testName, function, args, startTime, "", "SetChecksum failed", err) + return + } args["policy"] = policy.String() @@ -5594,18 +5277,12 @@ func testPresignedPostPolicy() { } writer.Close() - transport, err := minio.DefaultTransport(mustParseBool(os.Getenv(enableHTTPS))) - if err != nil { - logError(testName, function, args, startTime, "", "DefaultTransport failed", err) - return - } - httpClient := &http.Client{ // Setting a sensible time out of 30secs to wait for response // headers. Request is pro-actively canceled after 30secs // with no response. Timeout: 30 * time.Second, - Transport: transport, + Transport: createHTTPTransport(), } args["url"] = presignedPostPolicyURL.String() @@ -5640,7 +5317,7 @@ func testPresignedPostPolicy() { expectedLocation := scheme + os.Getenv(serverEndpoint) + "/" + bucketName + "/" + objectName expectedLocationBucketDNS := scheme + bucketName + "." + os.Getenv(serverEndpoint) + "/" + objectName - if !strings.Contains(expectedLocation, "s3.amazonaws.com/") { + if !strings.Contains(expectedLocation, ".amazonaws.com/") { // Test when not against AWS S3. if val, ok := res.Header["Location"]; ok { if val[0] != expectedLocation && val[0] != expectedLocationBucketDNS { @@ -5652,9 +5329,184 @@ func testPresignedPostPolicy() { return } } - want := checksum.Encoded() - if got := res.Header.Get("X-Amz-Checksum-Crc32c"); got != want { - logError(testName, function, args, startTime, "", fmt.Sprintf("Want checksum %q, got %q", want, got), nil) + wantChecksumCrc32c := checksum.Encoded() + if got := res.Header.Get("X-Amz-Checksum-Crc32c"); got != wantChecksumCrc32c { + logError(testName, function, args, startTime, "", fmt.Sprintf("Want checksum %q, got %q", wantChecksumCrc32c, got), nil) + return + } + + // Ensure that when we subsequently GetObject, the checksum is returned + gopts := minio.GetObjectOptions{Checksum: true} + r, err := c.GetObject(context.Background(), bucketName, objectName, gopts) + if err != nil { + logError(testName, function, args, startTime, "", "GetObject failed", err) + return + } + st, err := r.Stat() + if err != nil { + logError(testName, function, args, startTime, "", "Stat failed", err) + return + } + if st.ChecksumCRC32C != wantChecksumCrc32c { + logError(testName, function, args, startTime, "", fmt.Sprintf("Want checksum %s, got %s", wantChecksumCrc32c, st.ChecksumCRC32C), nil) + return + } + + logSuccess(testName, function, args, startTime) +} + +// testPresignedPostPolicyWrongFile tests that when we have a policy with a checksum, we cannot POST the wrong file +func testPresignedPostPolicyWrongFile() { + // initialize logging params + startTime := time.Now() + testName := getFuncName() + function := "PresignedPostPolicy(policy)" + args := map[string]interface{}{ + "policy": "", + } + + c, err := NewClient(ClientConfig{}) + if err != nil { + logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) + return + } + + // Generate a new random bucket name. + bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") + + // Make a new bucket in 'us-east-1' (source bucket). + err = c.MakeBucket(context.Background(), bucketName, minio.MakeBucketOptions{Region: "us-east-1"}) + if err != nil { + logError(testName, function, args, startTime, "", "MakeBucket failed", err) + return + } + + defer cleanupBucket(bucketName, c) + + objectName := randString(60, rand.NewSource(time.Now().UnixNano()), "") + // Azure requires the key to not start with a number + metadataKey := randString(60, rand.NewSource(time.Now().UnixNano()), "user") + metadataValue := randString(60, rand.NewSource(time.Now().UnixNano()), "") + + policy := minio.NewPostPolicy() + policy.SetBucket(bucketName) + policy.SetKey(objectName) + policy.SetExpires(time.Now().UTC().AddDate(0, 0, 10)) // expires in 10 days + policy.SetContentType("binary/octet-stream") + policy.SetContentLengthRange(10, 1024*1024) + policy.SetUserMetadata(metadataKey, metadataValue) + + // Add CRC32C of some data that the policy will explicitly allow. + checksum := minio.ChecksumCRC32C.ChecksumBytes([]byte{0x01, 0x02, 0x03}) + err = policy.SetChecksum(checksum) + if err != nil { + logError(testName, function, args, startTime, "", "SetChecksum failed", err) + return + } + + args["policy"] = policy.String() + + presignedPostPolicyURL, formData, err := c.PresignedPostPolicy(context.Background(), policy) + if err != nil { + logError(testName, function, args, startTime, "", "PresignedPostPolicy failed", err) + return + } + + // At this stage, we have a policy that allows us to upload for a specific checksum. + // Test that uploading datafile-10-kB, with a different checksum, fails as expected + filePath := getMintDataDirFilePath("datafile-10-kB") + if filePath == "" { + // Make a temp file with 10 KB data. + file, err := os.CreateTemp(os.TempDir(), "PresignedPostPolicyTest") + if err != nil { + logError(testName, function, args, startTime, "", "TempFile creation failed", err) + return + } + if _, err = io.Copy(file, getDataReader("datafile-10-kB")); err != nil { + logError(testName, function, args, startTime, "", "Copy failed", err) + return + } + if err = file.Close(); err != nil { + logError(testName, function, args, startTime, "", "File Close failed", err) + return + } + filePath = file.Name() + } + fileReader := getDataReader("datafile-10-kB") + defer fileReader.Close() + buf10k, err := io.ReadAll(fileReader) + if err != nil { + logError(testName, function, args, startTime, "", "ReadAll failed", err) + return + } + otherChecksum := minio.ChecksumCRC32C.ChecksumBytes(buf10k) + + var formBuf bytes.Buffer + writer := multipart.NewWriter(&formBuf) + for k, v := range formData { + if k == "x-amz-checksum-crc32c" { + v = otherChecksum.Encoded() + } + writer.WriteField(k, v) + } + + // Add file to post request + f, err := os.Open(filePath) + defer f.Close() + if err != nil { + logError(testName, function, args, startTime, "", "File open failed", err) + return + } + w, err := writer.CreateFormFile("file", filePath) + if err != nil { + logError(testName, function, args, startTime, "", "CreateFormFile failed", err) + return + } + _, err = io.Copy(w, f) + if err != nil { + logError(testName, function, args, startTime, "", "Copy failed", err) + return + } + writer.Close() + + httpClient := &http.Client{ + Timeout: 30 * time.Second, + Transport: createHTTPTransport(), + } + args["url"] = presignedPostPolicyURL.String() + + req, err := http.NewRequest(http.MethodPost, presignedPostPolicyURL.String(), bytes.NewReader(formBuf.Bytes())) + if err != nil { + logError(testName, function, args, startTime, "", "HTTP request failed", err) + return + } + + req.Header.Set("Content-Type", writer.FormDataContentType()) + + // Make the POST request with the form data. + res, err := httpClient.Do(req) + if err != nil { + logError(testName, function, args, startTime, "", "HTTP request failed", err) + return + } + defer res.Body.Close() + if res.StatusCode != http.StatusForbidden { + logError(testName, function, args, startTime, "", "HTTP request unexpected status", errors.New(res.Status)) + return + } + + // Read the response body, ensure it has checksum failure message + resBody, err := io.ReadAll(res.Body) + if err != nil { + logError(testName, function, args, startTime, "", "ReadAll failed", err) + return + } + + // Normalize the response body, because S3 uses quotes around the policy condition components + // in the error message, MinIO does not. + resBodyStr := strings.ReplaceAll(string(resBody), `"`, "") + if !strings.Contains(resBodyStr, "Policy Condition failed: [eq, $x-amz-checksum-crc32c, 8TDyHg=") { + logError(testName, function, args, startTime, "", "Unexpected response body", errors.New(resBodyStr)) return } @@ -5669,27 +5521,12 @@ func testCopyObject() { function := "CopyObject(dst, src)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") @@ -5864,27 +5701,12 @@ func testSSECEncryptedGetObjectReadSeekFunctional() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -6047,27 +5869,12 @@ func testSSES3EncryptedGetObjectReadSeekFunctional() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -6228,27 +6035,12 @@ func testSSECEncryptedGetObjectReadAtFunctional() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -6412,27 +6204,12 @@ func testSSES3EncryptedGetObjectReadAtFunctional() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -6597,27 +6374,13 @@ func testSSECEncryptionPutGet() { "objectName": "", "sse": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -6707,27 +6470,13 @@ func testSSECEncryptionFPut() { "contentType": "", "sse": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -6830,27 +6579,13 @@ func testSSES3EncryptionPutGet() { "objectName": "", "sse": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -6938,27 +6673,13 @@ func testSSES3EncryptionFPut() { "contentType": "", "sse": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -7067,26 +6788,12 @@ func testBucketNotification() { return } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable to debug - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - bucketName := os.Getenv("NOTIFY_BUCKET") args["bucketName"] = bucketName @@ -7162,26 +6869,12 @@ func testFunctional() { functionAll := "" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, nil, startTime, "", "MinIO client object creation failed", err) return } - // Enable to debug - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") @@ -7519,7 +7212,7 @@ func testFunctional() { return } - transport, err := minio.DefaultTransport(mustParseBool(os.Getenv(enableHTTPS))) + transport := createHTTPTransport() if err != nil { logError(testName, function, args, startTime, "", "DefaultTransport failed", err) return @@ -7841,24 +7534,12 @@ func testGetObjectModified() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Make a new bucket. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -7937,24 +7618,12 @@ func testPutObjectUploadSeekedObject() { "contentType": "binary/octet-stream", } - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Make a new bucket. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -8057,27 +7726,12 @@ func testMakeBucketErrorV2() { "region": "eu-west-1", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") region := "eu-west-1" @@ -8117,27 +7771,12 @@ func testGetObjectClosedTwiceV2() { "region": "eu-west-1", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -8208,27 +7847,12 @@ func testFPutObjectV2() { "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -8369,27 +7993,12 @@ func testMakeBucketRegionsV2() { "region": "eu-west-1", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -8432,27 +8041,12 @@ func testGetObjectReadSeekFunctionalV2() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -8587,27 +8181,12 @@ func testGetObjectReadAtFunctionalV2() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -8749,27 +8328,12 @@ func testCopyObjectV2() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") @@ -8968,13 +8532,7 @@ func testComposeObjectErrorCasesV2() { function := "ComposeObject(destination, sourceList)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9066,13 +8624,7 @@ func testCompose10KSourcesV2() { function := "ComposeObject(destination, sourceList)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9088,13 +8640,7 @@ func testEncryptedEmptyObject() { function := "PutObject(bucketName, objectName, reader, objectSize, opts)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return @@ -9242,7 +8788,7 @@ func testEncryptedCopyObjectWrapper(c *minio.Client, bucketName string, sseSrc, dstEncryption = sseDst } // 3. get copied object and check if content is equal - coreClient := minio.Core{c} + coreClient := minio.Core{Client: c} reader, _, _, err := coreClient.GetObject(context.Background(), bucketName, "dstObject", minio.GetObjectOptions{ServerSideEncryption: dstEncryption}) if err != nil { logError(testName, function, args, startTime, "", "GetObject failed", err) @@ -9349,13 +8895,7 @@ func testUnencryptedToSSECCopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9364,7 +8904,6 @@ func testUnencryptedToSSECCopyObject() { bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") sseDst := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"dstObject")) - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, nil, sseDst) } @@ -9376,13 +8915,7 @@ func testUnencryptedToSSES3CopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9392,7 +8925,6 @@ func testUnencryptedToSSES3CopyObject() { var sseSrc encrypt.ServerSide sseDst := encrypt.NewSSE() - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9404,13 +8936,7 @@ func testUnencryptedToUnencryptedCopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9419,7 +8945,6 @@ func testUnencryptedToUnencryptedCopyObject() { bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") var sseSrc, sseDst encrypt.ServerSide - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9431,13 +8956,7 @@ func testEncryptedSSECToSSECCopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9447,7 +8966,6 @@ func testEncryptedSSECToSSECCopyObject() { sseSrc := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"srcObject")) sseDst := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"dstObject")) - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9459,13 +8977,7 @@ func testEncryptedSSECToSSES3CopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9475,7 +8987,6 @@ func testEncryptedSSECToSSES3CopyObject() { sseSrc := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"srcObject")) sseDst := encrypt.NewSSE() - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9487,13 +8998,7 @@ func testEncryptedSSECToUnencryptedCopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9503,7 +9008,6 @@ func testEncryptedSSECToUnencryptedCopyObject() { sseSrc := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"srcObject")) var sseDst encrypt.ServerSide - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9515,13 +9019,7 @@ func testEncryptedSSES3ToSSECCopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9531,7 +9029,6 @@ func testEncryptedSSES3ToSSECCopyObject() { sseSrc := encrypt.NewSSE() sseDst := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"dstObject")) - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9543,13 +9040,7 @@ func testEncryptedSSES3ToSSES3CopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9559,7 +9050,6 @@ func testEncryptedSSES3ToSSES3CopyObject() { sseSrc := encrypt.NewSSE() sseDst := encrypt.NewSSE() - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9571,13 +9061,7 @@ func testEncryptedSSES3ToUnencryptedCopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9587,7 +9071,6 @@ func testEncryptedSSES3ToUnencryptedCopyObject() { sseSrc := encrypt.NewSSE() var sseDst encrypt.ServerSide - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9599,13 +9082,7 @@ func testEncryptedCopyObjectV2() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9615,7 +9092,6 @@ func testEncryptedCopyObjectV2() { sseSrc := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"srcObject")) sseDst := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"dstObject")) - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9626,13 +9102,7 @@ func testDecryptedCopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9686,26 +9156,14 @@ func testSSECMultipartEncryptedToSSECCopyObjectPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -9884,26 +9342,14 @@ func testSSECEncryptedToSSECCopyObjectPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -10062,26 +9508,14 @@ func testSSECEncryptedToUnencryptedCopyPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -10239,26 +9673,14 @@ func testSSECEncryptedToSSES3CopyObjectPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -10419,26 +9841,14 @@ func testUnencryptedToSSECCopyObjectPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -10594,26 +10004,14 @@ func testUnencryptedToUnencryptedCopyPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -10765,26 +10163,14 @@ func testUnencryptedToSSES3CopyObjectPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -10938,26 +10324,14 @@ func testSSES3EncryptedToSSECCopyObjectPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -11114,26 +10488,14 @@ func testSSES3EncryptedToUnencryptedCopyPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -11286,26 +10648,14 @@ func testSSES3EncryptedToSSES3CopyObjectPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -11460,19 +10810,12 @@ func testUserMetadataCopying() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // c.TraceOn(os.Stderr) testUserMetadataCopyingWrapper(c) } @@ -11637,19 +10980,12 @@ func testUserMetadataCopyingV2() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err) return } - // c.TraceOn(os.Stderr) testUserMetadataCopyingWrapper(c) } @@ -11660,13 +10996,7 @@ func testStorageClassMetadataPutObject() { args := map[string]interface{}{} testName := getFuncName() - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return @@ -11748,13 +11078,7 @@ func testStorageClassInvalidMetadataPutObject() { args := map[string]interface{}{} testName := getFuncName() - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return @@ -11791,13 +11115,7 @@ func testStorageClassMetadataCopyObject() { args := map[string]interface{}{} testName := getFuncName() - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - Transport: createHTTPTransport(), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return @@ -11918,27 +11236,12 @@ func testPutObjectNoLengthV2() { "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { - logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err) + logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -11994,27 +11297,12 @@ func testPutObjectsUnknownV2() { "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { - logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err) + logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -12085,27 +11373,12 @@ func testPutObject0ByteV2() { "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { - logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err) + logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -12150,13 +11423,7 @@ func testComposeObjectErrorCases() { function := "ComposeObject(destination, sourceList)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return @@ -12173,13 +11440,7 @@ func testCompose10KSources() { function := "ComposeObject(destination, sourceList)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return @@ -12197,26 +11458,12 @@ func testFunctionalV2() { functionAll := "" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - Transport: createHTTPTransport(), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err) return } - // Enable to debug - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") location := "us-east-1" @@ -12450,18 +11697,12 @@ func testFunctionalV2() { return } - transport, err := minio.DefaultTransport(mustParseBool(os.Getenv(enableHTTPS))) - if err != nil { - logError(testName, function, args, startTime, "", "DefaultTransport failed", err) - return - } - httpClient := &http.Client{ // Setting a sensible time out of 30secs to wait for response // headers. Request is pro-actively canceled after 30secs // with no response. Timeout: 30 * time.Second, - Transport: transport, + Transport: createHTTPTransport(), } req, err := http.NewRequest(http.MethodHead, presignedHeadURL.String(), nil) @@ -12656,27 +11897,13 @@ func testGetObjectContext() { "bucketName": "", "objectName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -12759,27 +11986,13 @@ func testFGetObjectContext() { "objectName": "", "fileName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -12851,24 +12064,12 @@ func testGetObjectRanges() { defer cancel() rng := rand.NewSource(time.Now().UnixNano()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rng, "minio-go-test-") args["bucketName"] = bucketName @@ -12958,27 +12159,13 @@ func testGetObjectACLContext() { "bucketName": "", "objectName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -13136,24 +12323,12 @@ func testPutObjectContextV2() { "size": "", "opts": "", } - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { - logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err) + logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Make a new bucket. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -13208,27 +12383,13 @@ func testGetObjectContextV2() { "bucketName": "", "objectName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { - logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err) + logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -13309,27 +12470,13 @@ func testFGetObjectContextV2() { "objectName": "", "fileName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { - logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err) + logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -13398,27 +12545,13 @@ func testListObjects() { "objectPrefix": "", "recursive": "true", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -13502,24 +12635,12 @@ func testCors() { "cors": "", } - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Create or reuse a bucket that will get cors settings applied to it and deleted when done bucketName := os.Getenv("MINIO_GO_TEST_BUCKET_CORS") if bucketName == "" { @@ -13556,14 +12677,9 @@ func testCors() { bucketURL := c.EndpointURL().String() + "/" + bucketName + "/" objectURL := bucketURL + objectName - transport, err := minio.DefaultTransport(mustParseBool(os.Getenv(enableHTTPS))) - if err != nil { - logError(testName, function, args, startTime, "", "DefaultTransport failed", err) - return - } httpClient := &http.Client{ Timeout: 30 * time.Second, - Transport: transport, + Transport: createHTTPTransport(), } errStrAccessForbidden := `AccessForbiddenCORSResponse: This CORS request is not allowed. This is usually because the evalution of Origin, request method / Access-Control-Request-Method or Access-Control-Request-Headers are not whitelisted` @@ -14243,24 +13359,12 @@ func testCorsSetGetDelete() { "cors": "", } - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -14342,27 +13446,13 @@ func testRemoveObjects() { "objectPrefix": "", "recursive": "true", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -14476,27 +13566,13 @@ func testGetBucketTagging() { args := map[string]interface{}{ "bucketName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -14532,27 +13608,13 @@ func testSetBucketTagging() { "bucketName": "", "tags": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -14618,27 +13680,13 @@ func testRemoveBucketTagging() { args := map[string]interface{}{ "bucketName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -14757,7 +13805,9 @@ func main() { testCompose10KSourcesV2() testUserMetadataCopyingV2() testPutObjectWithChecksums() - testPutMultipartObjectWithChecksums() + testPutObjectWithTrailingChecksums() + testPutMultipartObjectWithChecksums(false) + testPutMultipartObjectWithChecksums(true) testPutObject0ByteV2() testPutObjectNoLengthV2() testPutObjectsUnknownV2() @@ -14782,6 +13832,7 @@ func main() { testGetObjectReadAtFunctional() testGetObjectReadAtWhenEOFWasReached() testPresignedPostPolicy() + testPresignedPostPolicyWrongFile() testCopyObject() testComposeObjectErrorCases() testCompose10KSources() diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/assume_role.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/assume_role.go index d245bc0..cd0a641 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/assume_role.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/assume_role.go @@ -76,7 +76,8 @@ type AssumeRoleResult struct { type STSAssumeRole struct { Expiry - // Required http Client to use when connecting to MinIO STS service. + // Optional http Client to use when connecting to MinIO STS service + // (overrides default client in CredContext) Client *http.Client // STS endpoint to fetch STS credentials. @@ -108,16 +109,10 @@ type STSAssumeRoleOptions struct { // NewSTSAssumeRole returns a pointer to a new // Credentials object wrapping the STSAssumeRole. func NewSTSAssumeRole(stsEndpoint string, opts STSAssumeRoleOptions) (*Credentials, error) { - if stsEndpoint == "" { - return nil, errors.New("STS endpoint cannot be empty") - } if opts.AccessKey == "" || opts.SecretKey == "" { return nil, errors.New("AssumeRole credentials access/secretkey is mandatory") } return New(&STSAssumeRole{ - Client: &http.Client{ - Transport: http.DefaultTransport, - }, STSEndpoint: stsEndpoint, Options: opts, }), nil @@ -222,10 +217,30 @@ func getAssumeRoleCredentials(clnt *http.Client, endpoint string, opts STSAssume return a, nil } -// Retrieve retrieves credentials from the MinIO service. -// Error will be returned if the request fails. -func (m *STSAssumeRole) Retrieve() (Value, error) { - a, err := getAssumeRoleCredentials(m.Client, m.STSEndpoint, m.Options) +// RetrieveWithCredContext retrieves credentials from the MinIO service. +// Error will be returned if the request fails, optional cred context. +func (m *STSAssumeRole) RetrieveWithCredContext(cc *CredContext) (Value, error) { + if cc == nil { + cc = defaultCredContext + } + + client := m.Client + if client == nil { + client = cc.Client + } + if client == nil { + client = defaultCredContext.Client + } + + stsEndpoint := m.STSEndpoint + if stsEndpoint == "" { + stsEndpoint = cc.Endpoint + } + if stsEndpoint == "" { + return Value{}, errors.New("STS endpoint unknown") + } + + a, err := getAssumeRoleCredentials(client, stsEndpoint, m.Options) if err != nil { return Value{}, err } @@ -241,3 +256,9 @@ func (m *STSAssumeRole) Retrieve() (Value, error) { SignerType: SignatureV4, }, nil } + +// Retrieve retrieves credentials from the MinIO service. +// Error will be returned if the request fails. +func (m *STSAssumeRole) Retrieve() (Value, error) { + return m.RetrieveWithCredContext(nil) +} diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/chain.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/chain.go index ddccfb1..5ef3597 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/chain.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/chain.go @@ -55,6 +55,24 @@ func NewChainCredentials(providers []Provider) *Credentials { }) } +// RetrieveWithCredContext is like Retrieve with CredContext +func (c *Chain) RetrieveWithCredContext(cc *CredContext) (Value, error) { + for _, p := range c.Providers { + creds, _ := p.RetrieveWithCredContext(cc) + // Always prioritize non-anonymous providers, if any. + if creds.AccessKeyID == "" && creds.SecretAccessKey == "" { + continue + } + c.curr = p + return creds, nil + } + // At this point we have exhausted all the providers and + // are left without any credentials return anonymous. + return Value{ + SignerType: SignatureAnonymous, + }, nil +} + // Retrieve returns the credentials value, returns no credentials(anonymous) // if no credentials provider returned any value. // diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/credentials.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/credentials.go index 68f9b38..52aff9a 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/credentials.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/credentials.go @@ -18,6 +18,7 @@ package credentials import ( + "net/http" "sync" "time" ) @@ -30,6 +31,10 @@ const ( defaultExpiryWindow = 0.8 ) +// defaultCredContext is used when the credential context doesn't +// actually matter or the default context is suitable. +var defaultCredContext = &CredContext{Client: http.DefaultClient} + // A Value is the S3 credentials value for individual credential fields. type Value struct { // S3 Access key ID @@ -52,8 +57,17 @@ type Value struct { // Value. A provider is required to manage its own Expired state, and what to // be expired means. type Provider interface { + // RetrieveWithCredContext returns nil if it successfully retrieved the + // value. Error is returned if the value were not obtainable, or empty. + // optionally takes CredContext for additional context to retrieve credentials. + RetrieveWithCredContext(cc *CredContext) (Value, error) + // Retrieve returns nil if it successfully retrieved the value. // Error is returned if the value were not obtainable, or empty. + // + // Deprecated: Retrieve() exists for historical compatibility and should not + // be used. To get new credentials use the RetrieveWithCredContext function + // to ensure the proper context (i.e. HTTP client) will be used. Retrieve() (Value, error) // IsExpired returns if the credentials are no longer valid, and need @@ -61,6 +75,18 @@ type Provider interface { IsExpired() bool } +// CredContext is passed to the Retrieve function of a provider to provide +// some additional context to retrieve credentials. +type CredContext struct { + // Client specifies the HTTP client that should be used if an HTTP + // request is to be made to fetch the credentials. + Client *http.Client + + // Endpoint specifies the MinIO endpoint that will be used if no + // explicit endpoint is provided. + Endpoint string +} + // A Expiry provides shared expiration logic to be used by credentials // providers to implement expiry functionality. // @@ -146,16 +172,36 @@ func New(provider Provider) *Credentials { // // If Credentials.Expire() was called the credentials Value will be force // expired, and the next call to Get() will cause them to be refreshed. +// +// Deprecated: Get() exists for historical compatibility and should not be +// used. To get new credentials use the Credentials.GetWithContext function +// to ensure the proper context (i.e. HTTP client) will be used. func (c *Credentials) Get() (Value, error) { + return c.GetWithContext(nil) +} + +// GetWithContext returns the credentials value, or error if the +// credentials Value failed to be retrieved. +// +// Will return the cached credentials Value if it has not expired. If the +// credentials Value has expired the Provider's Retrieve() will be called +// to refresh the credentials. +// +// If Credentials.Expire() was called the credentials Value will be force +// expired, and the next call to Get() will cause them to be refreshed. +func (c *Credentials) GetWithContext(cc *CredContext) (Value, error) { if c == nil { return Value{}, nil } + if cc == nil { + cc = defaultCredContext + } c.Lock() defer c.Unlock() if c.isExpired() { - creds, err := c.provider.Retrieve() + creds, err := c.provider.RetrieveWithCredContext(cc) if err != nil { return Value{}, err } diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/env_aws.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/env_aws.go index b6e60d0..21ab0a3 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/env_aws.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/env_aws.go @@ -37,8 +37,7 @@ func NewEnvAWS() *Credentials { return New(&EnvAWS{}) } -// Retrieve retrieves the keys from the environment. -func (e *EnvAWS) Retrieve() (Value, error) { +func (e *EnvAWS) retrieve() (Value, error) { e.retrieved = false id := os.Getenv("AWS_ACCESS_KEY_ID") @@ -65,6 +64,16 @@ func (e *EnvAWS) Retrieve() (Value, error) { }, nil } +// Retrieve retrieves the keys from the environment. +func (e *EnvAWS) Retrieve() (Value, error) { + return e.retrieve() +} + +// RetrieveWithCredContext is like Retrieve (no-op input of Cred Context) +func (e *EnvAWS) RetrieveWithCredContext(_ *CredContext) (Value, error) { + return e.retrieve() +} + // IsExpired returns if the credentials have been retrieved. func (e *EnvAWS) IsExpired() bool { return !e.retrieved diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/env_minio.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/env_minio.go index 5bfeab1..dbfbdfc 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/env_minio.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/env_minio.go @@ -38,8 +38,7 @@ func NewEnvMinio() *Credentials { return New(&EnvMinio{}) } -// Retrieve retrieves the keys from the environment. -func (e *EnvMinio) Retrieve() (Value, error) { +func (e *EnvMinio) retrieve() (Value, error) { e.retrieved = false id := os.Getenv("MINIO_ROOT_USER") @@ -62,6 +61,16 @@ func (e *EnvMinio) Retrieve() (Value, error) { }, nil } +// Retrieve retrieves the keys from the environment. +func (e *EnvMinio) Retrieve() (Value, error) { + return e.retrieve() +} + +// RetrieveWithCredContext is like Retrieve() (no-op input cred context) +func (e *EnvMinio) RetrieveWithCredContext(_ *CredContext) (Value, error) { + return e.retrieve() +} + // IsExpired returns if the credentials have been retrieved. func (e *EnvMinio) IsExpired() bool { return !e.retrieved diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/file_aws_credentials.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/file_aws_credentials.go index 541e1a7..0c83fc7 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/file_aws_credentials.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/file_aws_credentials.go @@ -71,9 +71,7 @@ func NewFileAWSCredentials(filename, profile string) *Credentials { }) } -// Retrieve reads and extracts the shared credentials from the current -// users home directory. -func (p *FileAWSCredentials) Retrieve() (Value, error) { +func (p *FileAWSCredentials) retrieve() (Value, error) { if p.Filename == "" { p.Filename = os.Getenv("AWS_SHARED_CREDENTIALS_FILE") if p.Filename == "" { @@ -142,6 +140,17 @@ func (p *FileAWSCredentials) Retrieve() (Value, error) { }, nil } +// Retrieve reads and extracts the shared credentials from the current +// users home directory. +func (p *FileAWSCredentials) Retrieve() (Value, error) { + return p.retrieve() +} + +// RetrieveWithCredContext is like Retrieve(), cred context is no-op for File credentials +func (p *FileAWSCredentials) RetrieveWithCredContext(_ *CredContext) (Value, error) { + return p.retrieve() +} + // loadProfiles loads from the file pointed to by shared credentials filename for profile. // The credentials retrieved from the profile will be returned or error. Error will be // returned if it fails to read from the file, or the data is invalid. diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/file_minio_client.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/file_minio_client.go index 750e26f..5805281 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/file_minio_client.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/file_minio_client.go @@ -56,9 +56,7 @@ func NewFileMinioClient(filename, alias string) *Credentials { }) } -// Retrieve reads and extracts the shared credentials from the current -// users home directory. -func (p *FileMinioClient) Retrieve() (Value, error) { +func (p *FileMinioClient) retrieve() (Value, error) { if p.Filename == "" { if value, ok := os.LookupEnv("MINIO_SHARED_CREDENTIALS_FILE"); ok { p.Filename = value @@ -96,6 +94,17 @@ func (p *FileMinioClient) Retrieve() (Value, error) { }, nil } +// Retrieve reads and extracts the shared credentials from the current +// users home directory. +func (p *FileMinioClient) Retrieve() (Value, error) { + return p.retrieve() +} + +// RetrieveWithCredContext - is like Retrieve() +func (p *FileMinioClient) RetrieveWithCredContext(_ *CredContext) (Value, error) { + return p.retrieve() +} + // IsExpired returns if the shared credentials have expired. func (p *FileMinioClient) IsExpired() bool { return !p.retrieved diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/iam_aws.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/iam_aws.go index ea4b3ef..0ba06e7 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/iam_aws.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/iam_aws.go @@ -49,7 +49,8 @@ const DefaultExpiryWindow = -1 type IAM struct { Expiry - // Required http Client to use when connecting to IAM metadata service. + // Optional http Client to use when connecting to IAM metadata service + // (overrides default client in CredContext) Client *http.Client // Custom endpoint to fetch IAM role credentials. @@ -90,17 +91,16 @@ const ( // NewIAM returns a pointer to a new Credentials object wrapping the IAM. func NewIAM(endpoint string) *Credentials { return New(&IAM{ - Client: &http.Client{ - Transport: http.DefaultTransport, - }, Endpoint: endpoint, }) } -// Retrieve retrieves credentials from the EC2 service. -// Error will be returned if the request fails, or unable to extract -// the desired -func (m *IAM) Retrieve() (Value, error) { +// RetrieveWithCredContext is like Retrieve with Cred Context +func (m *IAM) RetrieveWithCredContext(cc *CredContext) (Value, error) { + if cc == nil { + cc = defaultCredContext + } + token := os.Getenv("AWS_CONTAINER_AUTHORIZATION_TOKEN") if token == "" { token = m.Container.AuthorizationToken @@ -144,7 +144,19 @@ func (m *IAM) Retrieve() (Value, error) { var roleCreds ec2RoleCredRespBody var err error + client := m.Client + if client == nil { + client = cc.Client + } + if client == nil { + client = defaultCredContext.Client + } + endpoint := m.Endpoint + if endpoint == "" { + endpoint = cc.Endpoint + } + switch { case identityFile != "": if len(endpoint) == 0 { @@ -160,7 +172,7 @@ func (m *IAM) Retrieve() (Value, error) { } creds := &STSWebIdentity{ - Client: m.Client, + Client: client, STSEndpoint: endpoint, GetWebIDTokenExpiry: func() (*WebIdentityToken, error) { token, err := os.ReadFile(identityFile) @@ -174,7 +186,7 @@ func (m *IAM) Retrieve() (Value, error) { roleSessionName: roleSessionName, } - stsWebIdentityCreds, err := creds.Retrieve() + stsWebIdentityCreds, err := creds.RetrieveWithCredContext(cc) if err == nil { m.SetExpiration(creds.Expiration(), DefaultExpiryWindow) } @@ -185,11 +197,11 @@ func (m *IAM) Retrieve() (Value, error) { endpoint = fmt.Sprintf("%s%s", DefaultECSRoleEndpoint, relativeURI) } - roleCreds, err = getEcsTaskCredentials(m.Client, endpoint, token) + roleCreds, err = getEcsTaskCredentials(client, endpoint, token) case tokenFile != "" && fullURI != "": endpoint = fullURI - roleCreds, err = getEKSPodIdentityCredentials(m.Client, endpoint, tokenFile) + roleCreds, err = getEKSPodIdentityCredentials(client, endpoint, tokenFile) case fullURI != "": if len(endpoint) == 0 { @@ -203,10 +215,10 @@ func (m *IAM) Retrieve() (Value, error) { } } - roleCreds, err = getEcsTaskCredentials(m.Client, endpoint, token) + roleCreds, err = getEcsTaskCredentials(client, endpoint, token) default: - roleCreds, err = getCredentials(m.Client, endpoint) + roleCreds, err = getCredentials(client, endpoint) } if err != nil { @@ -224,6 +236,13 @@ func (m *IAM) Retrieve() (Value, error) { }, nil } +// Retrieve retrieves credentials from the EC2 service. +// Error will be returned if the request fails, or unable to extract +// the desired +func (m *IAM) Retrieve() (Value, error) { + return m.RetrieveWithCredContext(nil) +} + // A ec2RoleCredRespBody provides the shape for unmarshaling credential // request responses. type ec2RoleCredRespBody struct { diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/static.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/static.go index 7dde00b..d90c98c 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/static.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/static.go @@ -59,6 +59,11 @@ func (s *Static) Retrieve() (Value, error) { return s.Value, nil } +// RetrieveWithCredContext returns the static credentials. +func (s *Static) RetrieveWithCredContext(_ *CredContext) (Value, error) { + return s.Retrieve() +} + // IsExpired returns if the credentials are expired. // // For Static, the credentials never expired. diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_client_grants.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_client_grants.go index 62bfbb6..ef6f436 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_client_grants.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_client_grants.go @@ -72,7 +72,8 @@ type ClientGrantsToken struct { type STSClientGrants struct { Expiry - // Required http Client to use when connecting to MinIO STS service. + // Optional http Client to use when connecting to MinIO STS service. + // (overrides default client in CredContext) Client *http.Client // MinIO endpoint to fetch STS credentials. @@ -90,16 +91,10 @@ type STSClientGrants struct { // NewSTSClientGrants returns a pointer to a new // Credentials object wrapping the STSClientGrants. func NewSTSClientGrants(stsEndpoint string, getClientGrantsTokenExpiry func() (*ClientGrantsToken, error)) (*Credentials, error) { - if stsEndpoint == "" { - return nil, errors.New("STS endpoint cannot be empty") - } if getClientGrantsTokenExpiry == nil { return nil, errors.New("Client grants access token and expiry retrieval function should be defined") } return New(&STSClientGrants{ - Client: &http.Client{ - Transport: http.DefaultTransport, - }, STSEndpoint: stsEndpoint, GetClientGrantsTokenExpiry: getClientGrantsTokenExpiry, }), nil @@ -162,10 +157,29 @@ func getClientGrantsCredentials(clnt *http.Client, endpoint string, return a, nil } -// Retrieve retrieves credentials from the MinIO service. -// Error will be returned if the request fails. -func (m *STSClientGrants) Retrieve() (Value, error) { - a, err := getClientGrantsCredentials(m.Client, m.STSEndpoint, m.GetClientGrantsTokenExpiry) +// RetrieveWithCredContext is like Retrieve() with cred context +func (m *STSClientGrants) RetrieveWithCredContext(cc *CredContext) (Value, error) { + if cc == nil { + cc = defaultCredContext + } + + client := m.Client + if client == nil { + client = cc.Client + } + if client == nil { + client = defaultCredContext.Client + } + + stsEndpoint := m.STSEndpoint + if stsEndpoint == "" { + stsEndpoint = cc.Endpoint + } + if stsEndpoint == "" { + return Value{}, errors.New("STS endpoint unknown") + } + + a, err := getClientGrantsCredentials(client, stsEndpoint, m.GetClientGrantsTokenExpiry) if err != nil { return Value{}, err } @@ -181,3 +195,9 @@ func (m *STSClientGrants) Retrieve() (Value, error) { SignerType: SignatureV4, }, nil } + +// Retrieve retrieves credentials from the MinIO service. +// Error will be returned if the request fails. +func (m *STSClientGrants) Retrieve() (Value, error) { + return m.RetrieveWithCredContext(nil) +} diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_custom_identity.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_custom_identity.go index 75e1a77..0021f93 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_custom_identity.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_custom_identity.go @@ -53,6 +53,8 @@ type AssumeRoleWithCustomTokenResponse struct { type CustomTokenIdentity struct { Expiry + // Optional http Client to use when connecting to MinIO STS service. + // (overrides default client in CredContext) Client *http.Client // MinIO server STS endpoint to fetch STS credentials. @@ -69,9 +71,21 @@ type CustomTokenIdentity struct { RequestedExpiry time.Duration } -// Retrieve - to satisfy Provider interface; fetches credentials from MinIO. -func (c *CustomTokenIdentity) Retrieve() (value Value, err error) { - u, err := url.Parse(c.STSEndpoint) +// RetrieveWithCredContext with Retrieve optionally cred context +func (c *CustomTokenIdentity) RetrieveWithCredContext(cc *CredContext) (value Value, err error) { + if cc == nil { + cc = defaultCredContext + } + + stsEndpoint := c.STSEndpoint + if stsEndpoint == "" { + stsEndpoint = cc.Endpoint + } + if stsEndpoint == "" { + return Value{}, errors.New("STS endpoint unknown") + } + + u, err := url.Parse(stsEndpoint) if err != nil { return value, err } @@ -92,7 +106,15 @@ func (c *CustomTokenIdentity) Retrieve() (value Value, err error) { return value, err } - resp, err := c.Client.Do(req) + client := c.Client + if client == nil { + client = cc.Client + } + if client == nil { + client = defaultCredContext.Client + } + + resp, err := client.Do(req) if err != nil { return value, err } @@ -118,11 +140,15 @@ func (c *CustomTokenIdentity) Retrieve() (value Value, err error) { }, nil } +// Retrieve - to satisfy Provider interface; fetches credentials from MinIO. +func (c *CustomTokenIdentity) Retrieve() (value Value, err error) { + return c.RetrieveWithCredContext(nil) +} + // NewCustomTokenCredentials - returns credentials using the // AssumeRoleWithCustomToken STS API. func NewCustomTokenCredentials(stsEndpoint, token, roleArn string, optFuncs ...CustomTokenOpt) (*Credentials, error) { c := CustomTokenIdentity{ - Client: &http.Client{Transport: http.DefaultTransport}, STSEndpoint: stsEndpoint, Token: token, RoleArn: roleArn, diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_ldap_identity.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_ldap_identity.go index b8df289..e63997e 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_ldap_identity.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_ldap_identity.go @@ -20,6 +20,7 @@ package credentials import ( "bytes" "encoding/xml" + "errors" "fmt" "io" "net/http" @@ -55,7 +56,8 @@ type LDAPIdentityResult struct { type LDAPIdentity struct { Expiry - // Required http Client to use when connecting to MinIO STS service. + // Optional http Client to use when connecting to MinIO STS service. + // (overrides default client in CredContext) Client *http.Client // Exported STS endpoint to fetch STS credentials. @@ -77,7 +79,6 @@ type LDAPIdentity struct { // Identity. func NewLDAPIdentity(stsEndpoint, ldapUsername, ldapPassword string, optFuncs ...LDAPIdentityOpt) (*Credentials, error) { l := LDAPIdentity{ - Client: &http.Client{Transport: http.DefaultTransport}, STSEndpoint: stsEndpoint, LDAPUsername: ldapUsername, LDAPPassword: ldapPassword, @@ -113,7 +114,6 @@ func LDAPIdentityExpiryOpt(d time.Duration) LDAPIdentityOpt { // Deprecated: Use the `LDAPIdentityPolicyOpt` with `NewLDAPIdentity` instead. func NewLDAPIdentityWithSessionPolicy(stsEndpoint, ldapUsername, ldapPassword, policy string) (*Credentials, error) { return New(&LDAPIdentity{ - Client: &http.Client{Transport: http.DefaultTransport}, STSEndpoint: stsEndpoint, LDAPUsername: ldapUsername, LDAPPassword: ldapPassword, @@ -121,10 +121,22 @@ func NewLDAPIdentityWithSessionPolicy(stsEndpoint, ldapUsername, ldapPassword, p }), nil } -// Retrieve gets the credential by calling the MinIO STS API for +// RetrieveWithCredContext gets the credential by calling the MinIO STS API for // LDAP on the configured stsEndpoint. -func (k *LDAPIdentity) Retrieve() (value Value, err error) { - u, err := url.Parse(k.STSEndpoint) +func (k *LDAPIdentity) RetrieveWithCredContext(cc *CredContext) (value Value, err error) { + if cc == nil { + cc = defaultCredContext + } + + stsEndpoint := k.STSEndpoint + if stsEndpoint == "" { + stsEndpoint = cc.Endpoint + } + if stsEndpoint == "" { + return Value{}, errors.New("STS endpoint unknown") + } + + u, err := url.Parse(stsEndpoint) if err != nil { return value, err } @@ -148,7 +160,15 @@ func (k *LDAPIdentity) Retrieve() (value Value, err error) { req.Header.Set("Content-Type", "application/x-www-form-urlencoded") - resp, err := k.Client.Do(req) + client := k.Client + if client == nil { + client = cc.Client + } + if client == nil { + client = defaultCredContext.Client + } + + resp, err := client.Do(req) if err != nil { return value, err } @@ -188,3 +208,9 @@ func (k *LDAPIdentity) Retrieve() (value Value, err error) { SignerType: SignatureV4, }, nil } + +// Retrieve gets the credential by calling the MinIO STS API for +// LDAP on the configured stsEndpoint. +func (k *LDAPIdentity) Retrieve() (value Value, err error) { + return k.RetrieveWithCredContext(defaultCredContext) +} diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_tls_identity.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_tls_identity.go index 1008350..c904bbe 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_tls_identity.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_tls_identity.go @@ -20,8 +20,8 @@ import ( "crypto/tls" "encoding/xml" "errors" + "fmt" "io" - "net" "net/http" "net/url" "strconv" @@ -36,7 +36,12 @@ type CertificateIdentityOption func(*STSCertificateIdentity) // CertificateIdentityWithTransport returns a CertificateIdentityOption that // customizes the STSCertificateIdentity with the given http.RoundTripper. func CertificateIdentityWithTransport(t http.RoundTripper) CertificateIdentityOption { - return CertificateIdentityOption(func(i *STSCertificateIdentity) { i.Client.Transport = t }) + return CertificateIdentityOption(func(i *STSCertificateIdentity) { + if i.Client == nil { + i.Client = &http.Client{} + } + i.Client.Transport = t + }) } // CertificateIdentityWithExpiry returns a CertificateIdentityOption that @@ -53,6 +58,10 @@ func CertificateIdentityWithExpiry(livetime time.Duration) CertificateIdentityOp type STSCertificateIdentity struct { Expiry + // Optional http Client to use when connecting to MinIO STS service. + // (overrides default client in CredContext) + Client *http.Client + // STSEndpoint is the base URL endpoint of the STS API. // For example, https://minio.local:9000 STSEndpoint string @@ -68,50 +77,18 @@ type STSCertificateIdentity struct { // The default livetime is one hour. S3CredentialLivetime time.Duration - // Client is the HTTP client used to authenticate and fetch - // S3 credentials. - // - // A custom TLS client configuration can be specified by - // using a custom http.Transport: - // Client: http.Client { - // Transport: &http.Transport{ - // TLSClientConfig: &tls.Config{}, - // }, - // } - Client http.Client + // Certificate is the client certificate that is used for + // STS authentication. + Certificate tls.Certificate } -var _ Provider = (*STSWebIdentity)(nil) // compiler check - // NewSTSCertificateIdentity returns a STSCertificateIdentity that authenticates // to the given STS endpoint with the given TLS certificate and retrieves and // rotates S3 credentials. func NewSTSCertificateIdentity(endpoint string, certificate tls.Certificate, options ...CertificateIdentityOption) (*Credentials, error) { - if endpoint == "" { - return nil, errors.New("STS endpoint cannot be empty") - } - if _, err := url.Parse(endpoint); err != nil { - return nil, err - } identity := &STSCertificateIdentity{ STSEndpoint: endpoint, - Client: http.Client{ - Transport: &http.Transport{ - Proxy: http.ProxyFromEnvironment, - DialContext: (&net.Dialer{ - Timeout: 30 * time.Second, - KeepAlive: 30 * time.Second, - }).DialContext, - ForceAttemptHTTP2: true, - MaxIdleConns: 100, - IdleConnTimeout: 90 * time.Second, - TLSHandshakeTimeout: 10 * time.Second, - ExpectContinueTimeout: 5 * time.Second, - TLSClientConfig: &tls.Config{ - Certificates: []tls.Certificate{certificate}, - }, - }, - }, + Certificate: certificate, } for _, option := range options { option(identity) @@ -119,10 +96,21 @@ func NewSTSCertificateIdentity(endpoint string, certificate tls.Certificate, opt return New(identity), nil } -// Retrieve fetches a new set of S3 credentials from the configured -// STS API endpoint. -func (i *STSCertificateIdentity) Retrieve() (Value, error) { - endpointURL, err := url.Parse(i.STSEndpoint) +// RetrieveWithCredContext is Retrieve with cred context +func (i *STSCertificateIdentity) RetrieveWithCredContext(cc *CredContext) (Value, error) { + if cc == nil { + cc = defaultCredContext + } + + stsEndpoint := i.STSEndpoint + if stsEndpoint == "" { + stsEndpoint = cc.Endpoint + } + if stsEndpoint == "" { + return Value{}, errors.New("STS endpoint unknown") + } + + endpointURL, err := url.Parse(stsEndpoint) if err != nil { return Value{}, err } @@ -145,7 +133,28 @@ func (i *STSCertificateIdentity) Retrieve() (Value, error) { } req.Form.Add("DurationSeconds", strconv.FormatUint(uint64(livetime.Seconds()), 10)) - resp, err := i.Client.Do(req) + client := i.Client + if client == nil { + client = cc.Client + } + if client == nil { + client = defaultCredContext.Client + } + + tr, ok := client.Transport.(*http.Transport) + if !ok { + return Value{}, fmt.Errorf("CredContext should contain an http.Transport value") + } + + // Clone the HTTP transport (patch the TLS client certificate) + trCopy := tr.Clone() + trCopy.TLSClientConfig.Certificates = []tls.Certificate{i.Certificate} + + // Clone the HTTP client (patch the HTTP transport) + clientCopy := *client + clientCopy.Transport = trCopy + + resp, err := clientCopy.Do(req) if err != nil { return Value{}, err } @@ -193,6 +202,11 @@ func (i *STSCertificateIdentity) Retrieve() (Value, error) { }, nil } +// Retrieve fetches a new set of S3 credentials from the configured STS API endpoint. +func (i *STSCertificateIdentity) Retrieve() (Value, error) { + return i.RetrieveWithCredContext(defaultCredContext) +} + // Expiration returns the expiration time of the current S3 credentials. func (i *STSCertificateIdentity) Expiration() time.Time { return i.expiration } diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_web_identity.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_web_identity.go index 596d951..2352588 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_web_identity.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_web_identity.go @@ -25,6 +25,7 @@ import ( "io" "net/http" "net/url" + "os" "strconv" "strings" "time" @@ -57,9 +58,10 @@ type WebIdentityResult struct { // WebIdentityToken - web identity token with expiry. type WebIdentityToken struct { - Token string - AccessToken string - Expiry int + Token string + AccessToken string + RefreshToken string + Expiry int } // A STSWebIdentity retrieves credentials from MinIO service, and keeps track if @@ -67,7 +69,8 @@ type WebIdentityToken struct { type STSWebIdentity struct { Expiry - // Required http Client to use when connecting to MinIO STS service. + // Optional http Client to use when connecting to MinIO STS service. + // (overrides default client in CredContext) Client *http.Client // Exported STS endpoint to fetch STS credentials. @@ -85,29 +88,53 @@ type STSWebIdentity struct { // assuming. RoleARN string + // Policy is the policy where the credentials should be limited too. + Policy string + // roleSessionName is the identifier for the assumed role session. roleSessionName string } // NewSTSWebIdentity returns a pointer to a new // Credentials object wrapping the STSWebIdentity. -func NewSTSWebIdentity(stsEndpoint string, getWebIDTokenExpiry func() (*WebIdentityToken, error)) (*Credentials, error) { - if stsEndpoint == "" { - return nil, errors.New("STS endpoint cannot be empty") - } +func NewSTSWebIdentity(stsEndpoint string, getWebIDTokenExpiry func() (*WebIdentityToken, error), opts ...func(*STSWebIdentity)) (*Credentials, error) { if getWebIDTokenExpiry == nil { return nil, errors.New("Web ID token and expiry retrieval function should be defined") } - return New(&STSWebIdentity{ - Client: &http.Client{ - Transport: http.DefaultTransport, - }, + i := &STSWebIdentity{ STSEndpoint: stsEndpoint, GetWebIDTokenExpiry: getWebIDTokenExpiry, - }), nil + } + for _, o := range opts { + o(i) + } + return New(i), nil } -func getWebIdentityCredentials(clnt *http.Client, endpoint, roleARN, roleSessionName string, +// NewKubernetesIdentity returns a pointer to a new +// Credentials object using the Kubernetes service account +func NewKubernetesIdentity(stsEndpoint string, opts ...func(*STSWebIdentity)) (*Credentials, error) { + return NewSTSWebIdentity(stsEndpoint, func() (*WebIdentityToken, error) { + token, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/token") + if err != nil { + return nil, err + } + + return &WebIdentityToken{ + Token: string(token), + }, nil + }, opts...) +} + +// WithPolicy option will enforce that the returned credentials +// will be scoped down to the specified policy +func WithPolicy(policy string) func(*STSWebIdentity) { + return func(i *STSWebIdentity) { + i.Policy = policy + } +} + +func getWebIdentityCredentials(clnt *http.Client, endpoint, roleARN, roleSessionName string, policy string, getWebIDTokenExpiry func() (*WebIdentityToken, error), ) (AssumeRoleWithWebIdentityResponse, error) { idToken, err := getWebIDTokenExpiry() @@ -130,9 +157,16 @@ func getWebIdentityCredentials(clnt *http.Client, endpoint, roleARN, roleSession // Usually set when server is using extended userInfo endpoint. v.Set("WebIdentityAccessToken", idToken.AccessToken) } + if idToken.RefreshToken != "" { + // Usually set when server is using extended userInfo endpoint. + v.Set("WebIdentityRefreshToken", idToken.RefreshToken) + } if idToken.Expiry > 0 { v.Set("DurationSeconds", fmt.Sprintf("%d", idToken.Expiry)) } + if policy != "" { + v.Set("Policy", policy) + } v.Set("Version", STSVersion) u, err := url.Parse(endpoint) @@ -180,10 +214,29 @@ func getWebIdentityCredentials(clnt *http.Client, endpoint, roleARN, roleSession return a, nil } -// Retrieve retrieves credentials from the MinIO service. -// Error will be returned if the request fails. -func (m *STSWebIdentity) Retrieve() (Value, error) { - a, err := getWebIdentityCredentials(m.Client, m.STSEndpoint, m.RoleARN, m.roleSessionName, m.GetWebIDTokenExpiry) +// RetrieveWithCredContext is like Retrieve with optional cred context. +func (m *STSWebIdentity) RetrieveWithCredContext(cc *CredContext) (Value, error) { + if cc == nil { + cc = defaultCredContext + } + + client := m.Client + if client == nil { + client = cc.Client + } + if client == nil { + client = defaultCredContext.Client + } + + stsEndpoint := m.STSEndpoint + if stsEndpoint == "" { + stsEndpoint = cc.Endpoint + } + if stsEndpoint == "" { + return Value{}, errors.New("STS endpoint unknown") + } + + a, err := getWebIdentityCredentials(client, stsEndpoint, m.RoleARN, m.roleSessionName, m.Policy, m.GetWebIDTokenExpiry) if err != nil { return Value{}, err } @@ -200,6 +253,12 @@ func (m *STSWebIdentity) Retrieve() (Value, error) { }, nil } +// Retrieve retrieves credentials from the MinIO service. +// Error will be returned if the request fails. +func (m *STSWebIdentity) Retrieve() (Value, error) { + return m.RetrieveWithCredContext(nil) +} + // Expiration returns the expiration time of the credentials func (m *STSWebIdentity) Expiration() time.Time { return m.expiration diff --git a/vendor/github.com/minio/minio-go/v7/pkg/lifecycle/lifecycle.go b/vendor/github.com/minio/minio-go/v7/pkg/lifecycle/lifecycle.go index e706b57..344af2b 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/lifecycle/lifecycle.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/lifecycle/lifecycle.go @@ -434,12 +434,34 @@ func (de DelMarkerExpiration) MarshalXML(enc *xml.Encoder, start xml.StartElemen return enc.EncodeElement(delMarkerExp(de), start) } +// AllVersionsExpiration represents AllVersionsExpiration actions element in an ILM policy +type AllVersionsExpiration struct { + XMLName xml.Name `xml:"AllVersionsExpiration" json:"-"` + Days int `xml:"Days,omitempty" json:"Days,omitempty"` + DeleteMarker ExpireDeleteMarker `xml:"DeleteMarker,omitempty" json:"DeleteMarker,omitempty"` +} + +// IsNull returns true if days field is 0 +func (e AllVersionsExpiration) IsNull() bool { + return e.Days == 0 +} + +// MarshalXML satisfies xml.Marshaler to provide custom encoding +func (e AllVersionsExpiration) MarshalXML(enc *xml.Encoder, start xml.StartElement) error { + if e.IsNull() { + return nil + } + type allVersionsExp AllVersionsExpiration + return enc.EncodeElement(allVersionsExp(e), start) +} + // MarshalJSON customizes json encoding by omitting empty values func (r Rule) MarshalJSON() ([]byte, error) { type rule struct { AbortIncompleteMultipartUpload *AbortIncompleteMultipartUpload `json:"AbortIncompleteMultipartUpload,omitempty"` Expiration *Expiration `json:"Expiration,omitempty"` DelMarkerExpiration *DelMarkerExpiration `json:"DelMarkerExpiration,omitempty"` + AllVersionsExpiration *AllVersionsExpiration `json:"AllVersionsExpiration,omitempty"` ID string `json:"ID"` RuleFilter *Filter `json:"Filter,omitempty"` NoncurrentVersionExpiration *NoncurrentVersionExpiration `json:"NoncurrentVersionExpiration,omitempty"` @@ -475,6 +497,9 @@ func (r Rule) MarshalJSON() ([]byte, error) { if !r.NoncurrentVersionTransition.isNull() { newr.NoncurrentVersionTransition = &r.NoncurrentVersionTransition } + if !r.AllVersionsExpiration.IsNull() { + newr.AllVersionsExpiration = &r.AllVersionsExpiration + } return json.Marshal(newr) } @@ -485,6 +510,7 @@ type Rule struct { AbortIncompleteMultipartUpload AbortIncompleteMultipartUpload `xml:"AbortIncompleteMultipartUpload,omitempty" json:"AbortIncompleteMultipartUpload,omitempty"` Expiration Expiration `xml:"Expiration,omitempty" json:"Expiration,omitempty"` DelMarkerExpiration DelMarkerExpiration `xml:"DelMarkerExpiration,omitempty" json:"DelMarkerExpiration,omitempty"` + AllVersionsExpiration AllVersionsExpiration `xml:"AllVersionsExpiration,omitempty" json:"AllVersionsExpiration,omitempty"` ID string `xml:"ID" json:"ID"` RuleFilter Filter `xml:"Filter,omitempty" json:"Filter,omitempty"` NoncurrentVersionExpiration NoncurrentVersionExpiration `xml:"NoncurrentVersionExpiration,omitempty" json:"NoncurrentVersionExpiration,omitempty"` diff --git a/vendor/github.com/minio/minio-go/v7/pkg/tags/tags.go b/vendor/github.com/minio/minio-go/v7/pkg/tags/tags.go index 7a84a6f..33465c6 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/tags/tags.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/tags/tags.go @@ -69,7 +69,7 @@ const ( // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html#tag-restrictions // borrowed from this article and also testing various ASCII characters following regex // is supported by AWS S3 for both tags and values. -var validTagKeyValue = regexp.MustCompile(`^[a-zA-Z0-9-+\-._:/@ ]+$`) +var validTagKeyValue = regexp.MustCompile(`^[a-zA-Z0-9-+\-._:/@ =]+$`) func checkKey(key string) error { if len(key) == 0 { diff --git a/vendor/github.com/minio/minio-go/v7/post-policy.go b/vendor/github.com/minio/minio-go/v7/post-policy.go index 3f02370..26bf441 100644 --- a/vendor/github.com/minio/minio-go/v7/post-policy.go +++ b/vendor/github.com/minio/minio-go/v7/post-policy.go @@ -85,7 +85,7 @@ func (p *PostPolicy) SetExpires(t time.Time) error { // SetKey - Sets an object name for the policy based upload. func (p *PostPolicy) SetKey(key string) error { - if strings.TrimSpace(key) == "" || key == "" { + if strings.TrimSpace(key) == "" { return errInvalidArgument("Object name is empty.") } policyCond := policyCondition{ @@ -118,7 +118,7 @@ func (p *PostPolicy) SetKeyStartsWith(keyStartsWith string) error { // SetBucket - Sets bucket at which objects will be uploaded to. func (p *PostPolicy) SetBucket(bucketName string) error { - if strings.TrimSpace(bucketName) == "" || bucketName == "" { + if strings.TrimSpace(bucketName) == "" { return errInvalidArgument("Bucket name is empty.") } policyCond := policyCondition{ @@ -135,7 +135,7 @@ func (p *PostPolicy) SetBucket(bucketName string) error { // SetCondition - Sets condition for credentials, date and algorithm func (p *PostPolicy) SetCondition(matchType, condition, value string) error { - if strings.TrimSpace(value) == "" || value == "" { + if strings.TrimSpace(value) == "" { return errInvalidArgument("No value specified for condition") } @@ -156,7 +156,7 @@ func (p *PostPolicy) SetCondition(matchType, condition, value string) error { // SetTagging - Sets tagging for the object for this policy based upload. func (p *PostPolicy) SetTagging(tagging string) error { - if strings.TrimSpace(tagging) == "" || tagging == "" { + if strings.TrimSpace(tagging) == "" { return errInvalidArgument("No tagging specified.") } _, err := tags.ParseObjectXML(strings.NewReader(tagging)) @@ -178,7 +178,7 @@ func (p *PostPolicy) SetTagging(tagging string) error { // SetContentType - Sets content-type of the object for this policy // based upload. func (p *PostPolicy) SetContentType(contentType string) error { - if strings.TrimSpace(contentType) == "" || contentType == "" { + if strings.TrimSpace(contentType) == "" { return errInvalidArgument("No content type specified.") } policyCond := policyCondition{ @@ -211,7 +211,7 @@ func (p *PostPolicy) SetContentTypeStartsWith(contentTypeStartsWith string) erro // SetContentDisposition - Sets content-disposition of the object for this policy func (p *PostPolicy) SetContentDisposition(contentDisposition string) error { - if strings.TrimSpace(contentDisposition) == "" || contentDisposition == "" { + if strings.TrimSpace(contentDisposition) == "" { return errInvalidArgument("No content disposition specified.") } policyCond := policyCondition{ @@ -226,27 +226,44 @@ func (p *PostPolicy) SetContentDisposition(contentDisposition string) error { return nil } +// SetContentEncoding - Sets content-encoding of the object for this policy +func (p *PostPolicy) SetContentEncoding(contentEncoding string) error { + if strings.TrimSpace(contentEncoding) == "" { + return errInvalidArgument("No content encoding specified.") + } + policyCond := policyCondition{ + matchType: "eq", + condition: "$Content-Encoding", + value: contentEncoding, + } + if err := p.addNewPolicy(policyCond); err != nil { + return err + } + p.formData["Content-Encoding"] = contentEncoding + return nil +} + // SetContentLengthRange - Set new min and max content length // condition for all incoming uploads. -func (p *PostPolicy) SetContentLengthRange(min, max int64) error { - if min > max { +func (p *PostPolicy) SetContentLengthRange(minLen, maxLen int64) error { + if minLen > maxLen { return errInvalidArgument("Minimum limit is larger than maximum limit.") } - if min < 0 { + if minLen < 0 { return errInvalidArgument("Minimum limit cannot be negative.") } - if max <= 0 { + if maxLen <= 0 { return errInvalidArgument("Maximum limit cannot be non-positive.") } - p.contentLengthRange.min = min - p.contentLengthRange.max = max + p.contentLengthRange.min = minLen + p.contentLengthRange.max = maxLen return nil } // SetSuccessActionRedirect - Sets the redirect success url of the object for this policy // based upload. func (p *PostPolicy) SetSuccessActionRedirect(redirect string) error { - if strings.TrimSpace(redirect) == "" || redirect == "" { + if strings.TrimSpace(redirect) == "" { return errInvalidArgument("Redirect is empty") } policyCond := policyCondition{ @@ -264,7 +281,7 @@ func (p *PostPolicy) SetSuccessActionRedirect(redirect string) error { // SetSuccessStatusAction - Sets the status success code of the object for this policy // based upload. func (p *PostPolicy) SetSuccessStatusAction(status string) error { - if strings.TrimSpace(status) == "" || status == "" { + if strings.TrimSpace(status) == "" { return errInvalidArgument("Status is empty") } policyCond := policyCondition{ @@ -282,10 +299,10 @@ func (p *PostPolicy) SetSuccessStatusAction(status string) error { // SetUserMetadata - Set user metadata as a key/value couple. // Can be retrieved through a HEAD request or an event. func (p *PostPolicy) SetUserMetadata(key, value string) error { - if strings.TrimSpace(key) == "" || key == "" { + if strings.TrimSpace(key) == "" { return errInvalidArgument("Key is empty") } - if strings.TrimSpace(value) == "" || value == "" { + if strings.TrimSpace(value) == "" { return errInvalidArgument("Value is empty") } headerName := fmt.Sprintf("x-amz-meta-%s", key) @@ -301,12 +318,49 @@ func (p *PostPolicy) SetUserMetadata(key, value string) error { return nil } +// SetUserMetadataStartsWith - Set how an user metadata should starts with. +// Can be retrieved through a HEAD request or an event. +func (p *PostPolicy) SetUserMetadataStartsWith(key, value string) error { + if strings.TrimSpace(key) == "" { + return errInvalidArgument("Key is empty") + } + headerName := fmt.Sprintf("x-amz-meta-%s", key) + policyCond := policyCondition{ + matchType: "starts-with", + condition: fmt.Sprintf("$%s", headerName), + value: value, + } + if err := p.addNewPolicy(policyCond); err != nil { + return err + } + p.formData[headerName] = value + return nil +} + // SetChecksum sets the checksum of the request. -func (p *PostPolicy) SetChecksum(c Checksum) { +func (p *PostPolicy) SetChecksum(c Checksum) error { if c.IsSet() { p.formData[amzChecksumAlgo] = c.Type.String() p.formData[c.Type.Key()] = c.Encoded() + + policyCond := policyCondition{ + matchType: "eq", + condition: fmt.Sprintf("$%s", amzChecksumAlgo), + value: c.Type.String(), + } + if err := p.addNewPolicy(policyCond); err != nil { + return err + } + policyCond = policyCondition{ + matchType: "eq", + condition: fmt.Sprintf("$%s", c.Type.Key()), + value: c.Encoded(), + } + if err := p.addNewPolicy(policyCond); err != nil { + return err + } } + return nil } // SetEncryption - sets encryption headers for POST API diff --git a/vendor/github.com/minio/minio-go/v7/retry-continous.go b/vendor/github.com/minio/minio-go/v7/retry-continous.go index bfeea95..81fcf16 100644 --- a/vendor/github.com/minio/minio-go/v7/retry-continous.go +++ b/vendor/github.com/minio/minio-go/v7/retry-continous.go @@ -20,7 +20,7 @@ package minio import "time" // newRetryTimerContinous creates a timer with exponentially increasing delays forever. -func (c *Client) newRetryTimerContinous(unit, cap time.Duration, jitter float64, doneCh chan struct{}) <-chan int { +func (c *Client) newRetryTimerContinous(baseSleep, maxSleep time.Duration, jitter float64, doneCh chan struct{}) <-chan int { attemptCh := make(chan int) // normalize jitter to the range [0, 1.0] @@ -39,10 +39,10 @@ func (c *Client) newRetryTimerContinous(unit, cap time.Duration, jitter float64, if attempt > maxAttempt { attempt = maxAttempt } - // sleep = random_between(0, min(cap, base * 2 ** attempt)) - sleep := unit * time.Duration(1< cap { - sleep = cap + // sleep = random_between(0, min(maxSleep, base * 2 ** attempt)) + sleep := baseSleep * time.Duration(1< maxSleep { + sleep = maxSleep } if jitter != NoJitter { sleep -= time.Duration(c.random.Float64() * float64(sleep) * jitter) diff --git a/vendor/github.com/minio/minio-go/v7/retry.go b/vendor/github.com/minio/minio-go/v7/retry.go index 5ddcad8..ed95401 100644 --- a/vendor/github.com/minio/minio-go/v7/retry.go +++ b/vendor/github.com/minio/minio-go/v7/retry.go @@ -45,7 +45,7 @@ var DefaultRetryCap = time.Second // newRetryTimer creates a timer with exponentially increasing // delays until the maximum retry attempts are reached. -func (c *Client) newRetryTimer(ctx context.Context, maxRetry int, unit, cap time.Duration, jitter float64) <-chan int { +func (c *Client) newRetryTimer(ctx context.Context, maxRetry int, baseSleep, maxSleep time.Duration, jitter float64) <-chan int { attemptCh := make(chan int) // computes the exponential backoff duration according to @@ -59,10 +59,10 @@ func (c *Client) newRetryTimer(ctx context.Context, maxRetry int, unit, cap time jitter = MaxJitter } - // sleep = random_between(0, min(cap, base * 2 ** attempt)) - sleep := unit * time.Duration(1< cap { - sleep = cap + // sleep = random_between(0, min(maxSleep, base * 2 ** attempt)) + sleep := baseSleep * time.Duration(1< maxSleep { + sleep = maxSleep } if jitter != NoJitter { sleep -= time.Duration(c.random.Float64() * float64(sleep) * jitter) @@ -112,6 +112,7 @@ func isS3CodeRetryable(s3Code string) (ok bool) { // List of HTTP status codes which are retryable. var retryableHTTPStatusCodes = map[int]struct{}{ + http.StatusRequestTimeout: {}, 429: {}, // http.StatusTooManyRequests is not part of the Go 1.5 library, yet 499: {}, // client closed request, retry. A non-standard status code introduced by nginx. http.StatusInternalServerError: {}, @@ -129,9 +130,10 @@ func isHTTPStatusRetryable(httpStatusCode int) (ok bool) { } // For now, all http Do() requests are retriable except some well defined errors -func isRequestErrorRetryable(err error) bool { +func isRequestErrorRetryable(ctx context.Context, err error) bool { if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { - return false + // Retry if internal timeout in the HTTP call. + return ctx.Err() == nil } if ue, ok := err.(*url.Error); ok { e := ue.Unwrap() diff --git a/vendor/github.com/minio/minio-go/v7/utils.go b/vendor/github.com/minio/minio-go/v7/utils.go index a5beb37..cd7d2c2 100644 --- a/vendor/github.com/minio/minio-go/v7/utils.go +++ b/vendor/github.com/minio/minio-go/v7/utils.go @@ -378,10 +378,11 @@ func ToObjectInfo(bucketName, objectName string, h http.Header) (ObjectInfo, err Restore: restore, // Checksum values - ChecksumCRC32: h.Get("x-amz-checksum-crc32"), - ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"), - ChecksumSHA1: h.Get("x-amz-checksum-sha1"), - ChecksumSHA256: h.Get("x-amz-checksum-sha256"), + ChecksumCRC32: h.Get(ChecksumCRC32.Key()), + ChecksumCRC32C: h.Get(ChecksumCRC32C.Key()), + ChecksumSHA1: h.Get(ChecksumSHA1.Key()), + ChecksumSHA256: h.Get(ChecksumSHA256.Key()), + ChecksumCRC64NVME: h.Get(ChecksumCRC64NVME.Key()), }, nil } @@ -698,3 +699,146 @@ func (h *hashReaderWrapper) Read(p []byte) (n int, err error) { } return n, err } + +// Following is ported from C to Go in 2016 by Justin Ruggles, with minimal alteration. +// Used uint for unsigned long. Used uint32 for input arguments in order to match +// the Go hash/crc32 package. zlib CRC32 combine (https://github.com/madler/zlib) +// Modified for hash/crc64 by Klaus Post, 2024. +func gf2MatrixTimes(mat []uint64, vec uint64) uint64 { + var sum uint64 + + for vec != 0 { + if vec&1 != 0 { + sum ^= mat[0] + } + vec >>= 1 + mat = mat[1:] + } + return sum +} + +func gf2MatrixSquare(square, mat []uint64) { + if len(square) != len(mat) { + panic("square matrix size mismatch") + } + for n := range mat { + square[n] = gf2MatrixTimes(mat, mat[n]) + } +} + +// crc32Combine returns the combined CRC-32 hash value of the two passed CRC-32 +// hash values crc1 and crc2. poly represents the generator polynomial +// and len2 specifies the byte length that the crc2 hash covers. +func crc32Combine(poly uint32, crc1, crc2 uint32, len2 int64) uint32 { + // degenerate case (also disallow negative lengths) + if len2 <= 0 { + return crc1 + } + + even := make([]uint64, 32) // even-power-of-two zeros operator + odd := make([]uint64, 32) // odd-power-of-two zeros operator + + // put operator for one zero bit in odd + odd[0] = uint64(poly) // CRC-32 polynomial + row := uint64(1) + for n := 1; n < 32; n++ { + odd[n] = row + row <<= 1 + } + + // put operator for two zero bits in even + gf2MatrixSquare(even, odd) + + // put operator for four zero bits in odd + gf2MatrixSquare(odd, even) + + // apply len2 zeros to crc1 (first square will put the operator for one + // zero byte, eight zero bits, in even) + crc1n := uint64(crc1) + for { + // apply zeros operator for this bit of len2 + gf2MatrixSquare(even, odd) + if len2&1 != 0 { + crc1n = gf2MatrixTimes(even, crc1n) + } + len2 >>= 1 + + // if no more bits set, then done + if len2 == 0 { + break + } + + // another iteration of the loop with odd and even swapped + gf2MatrixSquare(odd, even) + if len2&1 != 0 { + crc1n = gf2MatrixTimes(odd, crc1n) + } + len2 >>= 1 + + // if no more bits set, then done + if len2 == 0 { + break + } + } + + // return combined crc + crc1n ^= uint64(crc2) + return uint32(crc1n) +} + +func crc64Combine(poly uint64, crc1, crc2 uint64, len2 int64) uint64 { + // degenerate case (also disallow negative lengths) + if len2 <= 0 { + return crc1 + } + + even := make([]uint64, 64) // even-power-of-two zeros operator + odd := make([]uint64, 64) // odd-power-of-two zeros operator + + // put operator for one zero bit in odd + odd[0] = poly // CRC-64 polynomial + row := uint64(1) + for n := 1; n < 64; n++ { + odd[n] = row + row <<= 1 + } + + // put operator for two zero bits in even + gf2MatrixSquare(even, odd) + + // put operator for four zero bits in odd + gf2MatrixSquare(odd, even) + + // apply len2 zeros to crc1 (first square will put the operator for one + // zero byte, eight zero bits, in even) + crc1n := crc1 + for { + // apply zeros operator for this bit of len2 + gf2MatrixSquare(even, odd) + if len2&1 != 0 { + crc1n = gf2MatrixTimes(even, crc1n) + } + len2 >>= 1 + + // if no more bits set, then done + if len2 == 0 { + break + } + + // another iteration of the loop with odd and even swapped + gf2MatrixSquare(odd, even) + if len2&1 != 0 { + crc1n = gf2MatrixTimes(odd, crc1n) + } + len2 >>= 1 + + // if no more bits set, then done + if len2 == 0 { + break + } + } + + // return combined crc + crc1n ^= crc2 + return crc1n +} diff --git a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s index 6713acc..c389547 100644 --- a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s +++ b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s @@ -1,243 +1,2791 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run blamka_amd64.go -out ../blamka_amd64.s -pkg argon2. DO NOT EDIT. //go:build amd64 && gc && !purego #include "textflag.h" -DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 - -DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 - -#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ - MOVO v4, t1; \ - MOVO v5, v4; \ - MOVO t1, v5; \ - MOVO v6, t1; \ - PUNPCKLQDQ v6, t2; \ - PUNPCKHQDQ v7, v6; \ - PUNPCKHQDQ t2, v6; \ - PUNPCKLQDQ v7, t2; \ - MOVO t1, v7; \ - MOVO v2, t1; \ - PUNPCKHQDQ t2, v7; \ - PUNPCKLQDQ v3, t2; \ - PUNPCKHQDQ t2, v2; \ - PUNPCKLQDQ t1, t2; \ - PUNPCKHQDQ t2, v3 - -#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ - MOVO v4, t1; \ - MOVO v5, v4; \ - MOVO t1, v5; \ - MOVO v2, t1; \ - PUNPCKLQDQ v2, t2; \ - PUNPCKHQDQ v3, v2; \ - PUNPCKHQDQ t2, v2; \ - PUNPCKLQDQ v3, t2; \ - MOVO t1, v3; \ - MOVO v6, t1; \ - PUNPCKHQDQ t2, v3; \ - PUNPCKLQDQ v7, t2; \ - PUNPCKHQDQ t2, v6; \ - PUNPCKLQDQ t1, t2; \ - PUNPCKHQDQ t2, v7 - -#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, t0, c40, c48) \ - MOVO v0, t0; \ - PMULULQ v2, t0; \ - PADDQ v2, v0; \ - PADDQ t0, v0; \ - PADDQ t0, v0; \ - PXOR v0, v6; \ - PSHUFD $0xB1, v6, v6; \ - MOVO v4, t0; \ - PMULULQ v6, t0; \ - PADDQ v6, v4; \ - PADDQ t0, v4; \ - PADDQ t0, v4; \ - PXOR v4, v2; \ - PSHUFB c40, v2; \ - MOVO v0, t0; \ - PMULULQ v2, t0; \ - PADDQ v2, v0; \ - PADDQ t0, v0; \ - PADDQ t0, v0; \ - PXOR v0, v6; \ - PSHUFB c48, v6; \ - MOVO v4, t0; \ - PMULULQ v6, t0; \ - PADDQ v6, v4; \ - PADDQ t0, v4; \ - PADDQ t0, v4; \ - PXOR v4, v2; \ - MOVO v2, t0; \ - PADDQ v2, t0; \ - PSRLQ $63, v2; \ - PXOR t0, v2; \ - MOVO v1, t0; \ - PMULULQ v3, t0; \ - PADDQ v3, v1; \ - PADDQ t0, v1; \ - PADDQ t0, v1; \ - PXOR v1, v7; \ - PSHUFD $0xB1, v7, v7; \ - MOVO v5, t0; \ - PMULULQ v7, t0; \ - PADDQ v7, v5; \ - PADDQ t0, v5; \ - PADDQ t0, v5; \ - PXOR v5, v3; \ - PSHUFB c40, v3; \ - MOVO v1, t0; \ - PMULULQ v3, t0; \ - PADDQ v3, v1; \ - PADDQ t0, v1; \ - PADDQ t0, v1; \ - PXOR v1, v7; \ - PSHUFB c48, v7; \ - MOVO v5, t0; \ - PMULULQ v7, t0; \ - PADDQ v7, v5; \ - PADDQ t0, v5; \ - PADDQ t0, v5; \ - PXOR v5, v3; \ - MOVO v3, t0; \ - PADDQ v3, t0; \ - PSRLQ $63, v3; \ - PXOR t0, v3 - -#define LOAD_MSG_0(block, off) \ - MOVOU 8*(off+0)(block), X0; \ - MOVOU 8*(off+2)(block), X1; \ - MOVOU 8*(off+4)(block), X2; \ - MOVOU 8*(off+6)(block), X3; \ - MOVOU 8*(off+8)(block), X4; \ - MOVOU 8*(off+10)(block), X5; \ - MOVOU 8*(off+12)(block), X6; \ - MOVOU 8*(off+14)(block), X7 - -#define STORE_MSG_0(block, off) \ - MOVOU X0, 8*(off+0)(block); \ - MOVOU X1, 8*(off+2)(block); \ - MOVOU X2, 8*(off+4)(block); \ - MOVOU X3, 8*(off+6)(block); \ - MOVOU X4, 8*(off+8)(block); \ - MOVOU X5, 8*(off+10)(block); \ - MOVOU X6, 8*(off+12)(block); \ - MOVOU X7, 8*(off+14)(block) - -#define LOAD_MSG_1(block, off) \ - MOVOU 8*off+0*8(block), X0; \ - MOVOU 8*off+16*8(block), X1; \ - MOVOU 8*off+32*8(block), X2; \ - MOVOU 8*off+48*8(block), X3; \ - MOVOU 8*off+64*8(block), X4; \ - MOVOU 8*off+80*8(block), X5; \ - MOVOU 8*off+96*8(block), X6; \ - MOVOU 8*off+112*8(block), X7 - -#define STORE_MSG_1(block, off) \ - MOVOU X0, 8*off+0*8(block); \ - MOVOU X1, 8*off+16*8(block); \ - MOVOU X2, 8*off+32*8(block); \ - MOVOU X3, 8*off+48*8(block); \ - MOVOU X4, 8*off+64*8(block); \ - MOVOU X5, 8*off+80*8(block); \ - MOVOU X6, 8*off+96*8(block); \ - MOVOU X7, 8*off+112*8(block) - -#define BLAMKA_ROUND_0(block, off, t0, t1, c40, c48) \ - LOAD_MSG_0(block, off); \ - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ - SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \ - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \ - STORE_MSG_0(block, off) - -#define BLAMKA_ROUND_1(block, off, t0, t1, c40, c48) \ - LOAD_MSG_1(block, off); \ - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ - SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \ - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \ - STORE_MSG_1(block, off) - // func blamkaSSE4(b *block) -TEXT ·blamkaSSE4(SB), 4, $0-8 - MOVQ b+0(FP), AX - - MOVOU ·c40<>(SB), X10 - MOVOU ·c48<>(SB), X11 +// Requires: SSE2, SSSE3 +TEXT ·blamkaSSE4(SB), NOSPLIT, $0-8 + MOVQ b+0(FP), AX + MOVOU ·c40<>+0(SB), X10 + MOVOU ·c48<>+0(SB), X11 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU 32(AX), X2 + MOVOU 48(AX), X3 + MOVOU 64(AX), X4 + MOVOU 80(AX), X5 + MOVOU 96(AX), X6 + MOVOU 112(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, 32(AX) + MOVOU X3, 48(AX) + MOVOU X4, 64(AX) + MOVOU X5, 80(AX) + MOVOU X6, 96(AX) + MOVOU X7, 112(AX) + MOVOU 128(AX), X0 + MOVOU 144(AX), X1 + MOVOU 160(AX), X2 + MOVOU 176(AX), X3 + MOVOU 192(AX), X4 + MOVOU 208(AX), X5 + MOVOU 224(AX), X6 + MOVOU 240(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 128(AX) + MOVOU X1, 144(AX) + MOVOU X2, 160(AX) + MOVOU X3, 176(AX) + MOVOU X4, 192(AX) + MOVOU X5, 208(AX) + MOVOU X6, 224(AX) + MOVOU X7, 240(AX) + MOVOU 256(AX), X0 + MOVOU 272(AX), X1 + MOVOU 288(AX), X2 + MOVOU 304(AX), X3 + MOVOU 320(AX), X4 + MOVOU 336(AX), X5 + MOVOU 352(AX), X6 + MOVOU 368(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 256(AX) + MOVOU X1, 272(AX) + MOVOU X2, 288(AX) + MOVOU X3, 304(AX) + MOVOU X4, 320(AX) + MOVOU X5, 336(AX) + MOVOU X6, 352(AX) + MOVOU X7, 368(AX) + MOVOU 384(AX), X0 + MOVOU 400(AX), X1 + MOVOU 416(AX), X2 + MOVOU 432(AX), X3 + MOVOU 448(AX), X4 + MOVOU 464(AX), X5 + MOVOU 480(AX), X6 + MOVOU 496(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 384(AX) + MOVOU X1, 400(AX) + MOVOU X2, 416(AX) + MOVOU X3, 432(AX) + MOVOU X4, 448(AX) + MOVOU X5, 464(AX) + MOVOU X6, 480(AX) + MOVOU X7, 496(AX) + MOVOU 512(AX), X0 + MOVOU 528(AX), X1 + MOVOU 544(AX), X2 + MOVOU 560(AX), X3 + MOVOU 576(AX), X4 + MOVOU 592(AX), X5 + MOVOU 608(AX), X6 + MOVOU 624(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 512(AX) + MOVOU X1, 528(AX) + MOVOU X2, 544(AX) + MOVOU X3, 560(AX) + MOVOU X4, 576(AX) + MOVOU X5, 592(AX) + MOVOU X6, 608(AX) + MOVOU X7, 624(AX) + MOVOU 640(AX), X0 + MOVOU 656(AX), X1 + MOVOU 672(AX), X2 + MOVOU 688(AX), X3 + MOVOU 704(AX), X4 + MOVOU 720(AX), X5 + MOVOU 736(AX), X6 + MOVOU 752(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 640(AX) + MOVOU X1, 656(AX) + MOVOU X2, 672(AX) + MOVOU X3, 688(AX) + MOVOU X4, 704(AX) + MOVOU X5, 720(AX) + MOVOU X6, 736(AX) + MOVOU X7, 752(AX) + MOVOU 768(AX), X0 + MOVOU 784(AX), X1 + MOVOU 800(AX), X2 + MOVOU 816(AX), X3 + MOVOU 832(AX), X4 + MOVOU 848(AX), X5 + MOVOU 864(AX), X6 + MOVOU 880(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 768(AX) + MOVOU X1, 784(AX) + MOVOU X2, 800(AX) + MOVOU X3, 816(AX) + MOVOU X4, 832(AX) + MOVOU X5, 848(AX) + MOVOU X6, 864(AX) + MOVOU X7, 880(AX) + MOVOU 896(AX), X0 + MOVOU 912(AX), X1 + MOVOU 928(AX), X2 + MOVOU 944(AX), X3 + MOVOU 960(AX), X4 + MOVOU 976(AX), X5 + MOVOU 992(AX), X6 + MOVOU 1008(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 896(AX) + MOVOU X1, 912(AX) + MOVOU X2, 928(AX) + MOVOU X3, 944(AX) + MOVOU X4, 960(AX) + MOVOU X5, 976(AX) + MOVOU X6, 992(AX) + MOVOU X7, 1008(AX) + MOVOU (AX), X0 + MOVOU 128(AX), X1 + MOVOU 256(AX), X2 + MOVOU 384(AX), X3 + MOVOU 512(AX), X4 + MOVOU 640(AX), X5 + MOVOU 768(AX), X6 + MOVOU 896(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, (AX) + MOVOU X1, 128(AX) + MOVOU X2, 256(AX) + MOVOU X3, 384(AX) + MOVOU X4, 512(AX) + MOVOU X5, 640(AX) + MOVOU X6, 768(AX) + MOVOU X7, 896(AX) + MOVOU 16(AX), X0 + MOVOU 144(AX), X1 + MOVOU 272(AX), X2 + MOVOU 400(AX), X3 + MOVOU 528(AX), X4 + MOVOU 656(AX), X5 + MOVOU 784(AX), X6 + MOVOU 912(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 16(AX) + MOVOU X1, 144(AX) + MOVOU X2, 272(AX) + MOVOU X3, 400(AX) + MOVOU X4, 528(AX) + MOVOU X5, 656(AX) + MOVOU X6, 784(AX) + MOVOU X7, 912(AX) + MOVOU 32(AX), X0 + MOVOU 160(AX), X1 + MOVOU 288(AX), X2 + MOVOU 416(AX), X3 + MOVOU 544(AX), X4 + MOVOU 672(AX), X5 + MOVOU 800(AX), X6 + MOVOU 928(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 32(AX) + MOVOU X1, 160(AX) + MOVOU X2, 288(AX) + MOVOU X3, 416(AX) + MOVOU X4, 544(AX) + MOVOU X5, 672(AX) + MOVOU X6, 800(AX) + MOVOU X7, 928(AX) + MOVOU 48(AX), X0 + MOVOU 176(AX), X1 + MOVOU 304(AX), X2 + MOVOU 432(AX), X3 + MOVOU 560(AX), X4 + MOVOU 688(AX), X5 + MOVOU 816(AX), X6 + MOVOU 944(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 48(AX) + MOVOU X1, 176(AX) + MOVOU X2, 304(AX) + MOVOU X3, 432(AX) + MOVOU X4, 560(AX) + MOVOU X5, 688(AX) + MOVOU X6, 816(AX) + MOVOU X7, 944(AX) + MOVOU 64(AX), X0 + MOVOU 192(AX), X1 + MOVOU 320(AX), X2 + MOVOU 448(AX), X3 + MOVOU 576(AX), X4 + MOVOU 704(AX), X5 + MOVOU 832(AX), X6 + MOVOU 960(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 64(AX) + MOVOU X1, 192(AX) + MOVOU X2, 320(AX) + MOVOU X3, 448(AX) + MOVOU X4, 576(AX) + MOVOU X5, 704(AX) + MOVOU X6, 832(AX) + MOVOU X7, 960(AX) + MOVOU 80(AX), X0 + MOVOU 208(AX), X1 + MOVOU 336(AX), X2 + MOVOU 464(AX), X3 + MOVOU 592(AX), X4 + MOVOU 720(AX), X5 + MOVOU 848(AX), X6 + MOVOU 976(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 80(AX) + MOVOU X1, 208(AX) + MOVOU X2, 336(AX) + MOVOU X3, 464(AX) + MOVOU X4, 592(AX) + MOVOU X5, 720(AX) + MOVOU X6, 848(AX) + MOVOU X7, 976(AX) + MOVOU 96(AX), X0 + MOVOU 224(AX), X1 + MOVOU 352(AX), X2 + MOVOU 480(AX), X3 + MOVOU 608(AX), X4 + MOVOU 736(AX), X5 + MOVOU 864(AX), X6 + MOVOU 992(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 96(AX) + MOVOU X1, 224(AX) + MOVOU X2, 352(AX) + MOVOU X3, 480(AX) + MOVOU X4, 608(AX) + MOVOU X5, 736(AX) + MOVOU X6, 864(AX) + MOVOU X7, 992(AX) + MOVOU 112(AX), X0 + MOVOU 240(AX), X1 + MOVOU 368(AX), X2 + MOVOU 496(AX), X3 + MOVOU 624(AX), X4 + MOVOU 752(AX), X5 + MOVOU 880(AX), X6 + MOVOU 1008(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 112(AX) + MOVOU X1, 240(AX) + MOVOU X2, 368(AX) + MOVOU X3, 496(AX) + MOVOU X4, 624(AX) + MOVOU X5, 752(AX) + MOVOU X6, 880(AX) + MOVOU X7, 1008(AX) + RET - BLAMKA_ROUND_0(AX, 0, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 16, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 32, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 48, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 64, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 80, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 96, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 112, X8, X9, X10, X11) +DATA ·c40<>+0(SB)/8, $0x0201000706050403 +DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·c40<>(SB), RODATA|NOPTR, $16 - BLAMKA_ROUND_1(AX, 0, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 2, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 4, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 6, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 8, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 10, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 12, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 14, X8, X9, X10, X11) - RET +DATA ·c48<>+0(SB)/8, $0x0100070605040302 +DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·c48<>(SB), RODATA|NOPTR, $16 -// func mixBlocksSSE2(out, a, b, c *block) -TEXT ·mixBlocksSSE2(SB), 4, $0-32 +// func mixBlocksSSE2(out *block, a *block, b *block, c *block) +// Requires: SSE2 +TEXT ·mixBlocksSSE2(SB), NOSPLIT, $0-32 MOVQ out+0(FP), DX MOVQ a+8(FP), AX MOVQ b+16(FP), BX MOVQ c+24(FP), CX - MOVQ $128, DI + MOVQ $0x00000080, DI loop: - MOVOU 0(AX), X0 - MOVOU 0(BX), X1 - MOVOU 0(CX), X2 + MOVOU (AX), X0 + MOVOU (BX), X1 + MOVOU (CX), X2 PXOR X1, X0 PXOR X2, X0 - MOVOU X0, 0(DX) - ADDQ $16, AX - ADDQ $16, BX - ADDQ $16, CX - ADDQ $16, DX - SUBQ $2, DI + MOVOU X0, (DX) + ADDQ $0x10, AX + ADDQ $0x10, BX + ADDQ $0x10, CX + ADDQ $0x10, DX + SUBQ $0x02, DI JA loop RET -// func xorBlocksSSE2(out, a, b, c *block) -TEXT ·xorBlocksSSE2(SB), 4, $0-32 +// func xorBlocksSSE2(out *block, a *block, b *block, c *block) +// Requires: SSE2 +TEXT ·xorBlocksSSE2(SB), NOSPLIT, $0-32 MOVQ out+0(FP), DX MOVQ a+8(FP), AX MOVQ b+16(FP), BX MOVQ c+24(FP), CX - MOVQ $128, DI + MOVQ $0x00000080, DI loop: - MOVOU 0(AX), X0 - MOVOU 0(BX), X1 - MOVOU 0(CX), X2 - MOVOU 0(DX), X3 + MOVOU (AX), X0 + MOVOU (BX), X1 + MOVOU (CX), X2 + MOVOU (DX), X3 PXOR X1, X0 PXOR X2, X0 PXOR X3, X0 - MOVOU X0, 0(DX) - ADDQ $16, AX - ADDQ $16, BX - ADDQ $16, CX - ADDQ $16, DX - SUBQ $2, DI + MOVOU X0, (DX) + ADDQ $0x10, AX + ADDQ $0x10, BX + ADDQ $0x10, CX + ADDQ $0x10, DX + SUBQ $0x02, DI JA loop RET diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s index 9ae8206..f75162e 100644 --- a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s +++ b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s @@ -1,722 +1,4517 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run blake2bAVX2_amd64_asm.go -out ../../blake2bAVX2_amd64.s -pkg blake2b. DO NOT EDIT. //go:build amd64 && gc && !purego #include "textflag.h" -DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 -DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b -DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b -DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1 -GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1 -DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f -DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b -DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179 -GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403 -DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302 -DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 -DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b -GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b -DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 -GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1 -DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f -GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b -DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 -GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16 - -#define VPERMQ_0x39_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39 -#define VPERMQ_0x93_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93 -#define VPERMQ_0x4E_Y2_Y2 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e -#define VPERMQ_0x93_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93 -#define VPERMQ_0x39_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39 - -#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \ - VPADDQ m0, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFD $-79, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPSHUFB c40, Y1, Y1; \ - VPADDQ m1, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFB c48, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPADDQ Y1, Y1, t; \ - VPSRLQ $63, Y1, Y1; \ - VPXOR t, Y1, Y1; \ - VPERMQ_0x39_Y1_Y1; \ - VPERMQ_0x4E_Y2_Y2; \ - VPERMQ_0x93_Y3_Y3; \ - VPADDQ m2, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFD $-79, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPSHUFB c40, Y1, Y1; \ - VPADDQ m3, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFB c48, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPADDQ Y1, Y1, t; \ - VPSRLQ $63, Y1, Y1; \ - VPXOR t, Y1, Y1; \ - VPERMQ_0x39_Y3_Y3; \ - VPERMQ_0x4E_Y2_Y2; \ - VPERMQ_0x93_Y1_Y1 - -#define VMOVQ_SI_X11_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x1E -#define VMOVQ_SI_X12_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x26 -#define VMOVQ_SI_X13_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x2E -#define VMOVQ_SI_X14_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x36 -#define VMOVQ_SI_X15_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x3E - -#define VMOVQ_SI_X11(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x5E; BYTE $n -#define VMOVQ_SI_X12(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x66; BYTE $n -#define VMOVQ_SI_X13(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x6E; BYTE $n -#define VMOVQ_SI_X14(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x76; BYTE $n -#define VMOVQ_SI_X15(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x7E; BYTE $n - -#define VPINSRQ_1_SI_X11_0 BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x1E; BYTE $0x01 -#define VPINSRQ_1_SI_X12_0 BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x26; BYTE $0x01 -#define VPINSRQ_1_SI_X13_0 BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x2E; BYTE $0x01 -#define VPINSRQ_1_SI_X14_0 BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x36; BYTE $0x01 -#define VPINSRQ_1_SI_X15_0 BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x3E; BYTE $0x01 - -#define VPINSRQ_1_SI_X11(n) BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x5E; BYTE $n; BYTE $0x01 -#define VPINSRQ_1_SI_X12(n) BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x66; BYTE $n; BYTE $0x01 -#define VPINSRQ_1_SI_X13(n) BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x6E; BYTE $n; BYTE $0x01 -#define VPINSRQ_1_SI_X14(n) BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x76; BYTE $n; BYTE $0x01 -#define VPINSRQ_1_SI_X15(n) BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x7E; BYTE $n; BYTE $0x01 - -#define VMOVQ_R8_X15 BYTE $0xC4; BYTE $0x41; BYTE $0xF9; BYTE $0x6E; BYTE $0xF8 -#define VPINSRQ_1_R9_X15 BYTE $0xC4; BYTE $0x43; BYTE $0x81; BYTE $0x22; BYTE $0xF9; BYTE $0x01 - -// load msg: Y12 = (i0, i1, i2, i3) -// i0, i1, i2, i3 must not be 0 -#define LOAD_MSG_AVX2_Y12(i0, i1, i2, i3) \ - VMOVQ_SI_X12(i0*8); \ - VMOVQ_SI_X11(i2*8); \ - VPINSRQ_1_SI_X12(i1*8); \ - VPINSRQ_1_SI_X11(i3*8); \ - VINSERTI128 $1, X11, Y12, Y12 - -// load msg: Y13 = (i0, i1, i2, i3) -// i0, i1, i2, i3 must not be 0 -#define LOAD_MSG_AVX2_Y13(i0, i1, i2, i3) \ - VMOVQ_SI_X13(i0*8); \ - VMOVQ_SI_X11(i2*8); \ - VPINSRQ_1_SI_X13(i1*8); \ - VPINSRQ_1_SI_X11(i3*8); \ - VINSERTI128 $1, X11, Y13, Y13 - -// load msg: Y14 = (i0, i1, i2, i3) -// i0, i1, i2, i3 must not be 0 -#define LOAD_MSG_AVX2_Y14(i0, i1, i2, i3) \ - VMOVQ_SI_X14(i0*8); \ - VMOVQ_SI_X11(i2*8); \ - VPINSRQ_1_SI_X14(i1*8); \ - VPINSRQ_1_SI_X11(i3*8); \ - VINSERTI128 $1, X11, Y14, Y14 - -// load msg: Y15 = (i0, i1, i2, i3) -// i0, i1, i2, i3 must not be 0 -#define LOAD_MSG_AVX2_Y15(i0, i1, i2, i3) \ - VMOVQ_SI_X15(i0*8); \ - VMOVQ_SI_X11(i2*8); \ - VPINSRQ_1_SI_X15(i1*8); \ - VPINSRQ_1_SI_X11(i3*8); \ - VINSERTI128 $1, X11, Y15, Y15 - -#define LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() \ - VMOVQ_SI_X12_0; \ - VMOVQ_SI_X11(4*8); \ - VPINSRQ_1_SI_X12(2*8); \ - VPINSRQ_1_SI_X11(6*8); \ - VINSERTI128 $1, X11, Y12, Y12; \ - LOAD_MSG_AVX2_Y13(1, 3, 5, 7); \ - LOAD_MSG_AVX2_Y14(8, 10, 12, 14); \ - LOAD_MSG_AVX2_Y15(9, 11, 13, 15) - -#define LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() \ - LOAD_MSG_AVX2_Y12(14, 4, 9, 13); \ - LOAD_MSG_AVX2_Y13(10, 8, 15, 6); \ - VMOVQ_SI_X11(11*8); \ - VPSHUFD $0x4E, 0*8(SI), X14; \ - VPINSRQ_1_SI_X11(5*8); \ - VINSERTI128 $1, X11, Y14, Y14; \ - LOAD_MSG_AVX2_Y15(12, 2, 7, 3) - -#define LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() \ - VMOVQ_SI_X11(5*8); \ - VMOVDQU 11*8(SI), X12; \ - VPINSRQ_1_SI_X11(15*8); \ - VINSERTI128 $1, X11, Y12, Y12; \ - VMOVQ_SI_X13(8*8); \ - VMOVQ_SI_X11(2*8); \ - VPINSRQ_1_SI_X13_0; \ - VPINSRQ_1_SI_X11(13*8); \ - VINSERTI128 $1, X11, Y13, Y13; \ - LOAD_MSG_AVX2_Y14(10, 3, 7, 9); \ - LOAD_MSG_AVX2_Y15(14, 6, 1, 4) - -#define LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() \ - LOAD_MSG_AVX2_Y12(7, 3, 13, 11); \ - LOAD_MSG_AVX2_Y13(9, 1, 12, 14); \ - LOAD_MSG_AVX2_Y14(2, 5, 4, 15); \ - VMOVQ_SI_X15(6*8); \ - VMOVQ_SI_X11_0; \ - VPINSRQ_1_SI_X15(10*8); \ - VPINSRQ_1_SI_X11(8*8); \ - VINSERTI128 $1, X11, Y15, Y15 - -#define LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() \ - LOAD_MSG_AVX2_Y12(9, 5, 2, 10); \ - VMOVQ_SI_X13_0; \ - VMOVQ_SI_X11(4*8); \ - VPINSRQ_1_SI_X13(7*8); \ - VPINSRQ_1_SI_X11(15*8); \ - VINSERTI128 $1, X11, Y13, Y13; \ - LOAD_MSG_AVX2_Y14(14, 11, 6, 3); \ - LOAD_MSG_AVX2_Y15(1, 12, 8, 13) - -#define LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() \ - VMOVQ_SI_X12(2*8); \ - VMOVQ_SI_X11_0; \ - VPINSRQ_1_SI_X12(6*8); \ - VPINSRQ_1_SI_X11(8*8); \ - VINSERTI128 $1, X11, Y12, Y12; \ - LOAD_MSG_AVX2_Y13(12, 10, 11, 3); \ - LOAD_MSG_AVX2_Y14(4, 7, 15, 1); \ - LOAD_MSG_AVX2_Y15(13, 5, 14, 9) - -#define LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() \ - LOAD_MSG_AVX2_Y12(12, 1, 14, 4); \ - LOAD_MSG_AVX2_Y13(5, 15, 13, 10); \ - VMOVQ_SI_X14_0; \ - VPSHUFD $0x4E, 8*8(SI), X11; \ - VPINSRQ_1_SI_X14(6*8); \ - VINSERTI128 $1, X11, Y14, Y14; \ - LOAD_MSG_AVX2_Y15(7, 3, 2, 11) - -#define LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() \ - LOAD_MSG_AVX2_Y12(13, 7, 12, 3); \ - LOAD_MSG_AVX2_Y13(11, 14, 1, 9); \ - LOAD_MSG_AVX2_Y14(5, 15, 8, 2); \ - VMOVQ_SI_X15_0; \ - VMOVQ_SI_X11(6*8); \ - VPINSRQ_1_SI_X15(4*8); \ - VPINSRQ_1_SI_X11(10*8); \ - VINSERTI128 $1, X11, Y15, Y15 - -#define LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() \ - VMOVQ_SI_X12(6*8); \ - VMOVQ_SI_X11(11*8); \ - VPINSRQ_1_SI_X12(14*8); \ - VPINSRQ_1_SI_X11_0; \ - VINSERTI128 $1, X11, Y12, Y12; \ - LOAD_MSG_AVX2_Y13(15, 9, 3, 8); \ - VMOVQ_SI_X11(1*8); \ - VMOVDQU 12*8(SI), X14; \ - VPINSRQ_1_SI_X11(10*8); \ - VINSERTI128 $1, X11, Y14, Y14; \ - VMOVQ_SI_X15(2*8); \ - VMOVDQU 4*8(SI), X11; \ - VPINSRQ_1_SI_X15(7*8); \ - VINSERTI128 $1, X11, Y15, Y15 - -#define LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() \ - LOAD_MSG_AVX2_Y12(10, 8, 7, 1); \ - VMOVQ_SI_X13(2*8); \ - VPSHUFD $0x4E, 5*8(SI), X11; \ - VPINSRQ_1_SI_X13(4*8); \ - VINSERTI128 $1, X11, Y13, Y13; \ - LOAD_MSG_AVX2_Y14(15, 9, 3, 13); \ - VMOVQ_SI_X15(11*8); \ - VMOVQ_SI_X11(12*8); \ - VPINSRQ_1_SI_X15(14*8); \ - VPINSRQ_1_SI_X11_0; \ - VINSERTI128 $1, X11, Y15, Y15 - // func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) -TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment - MOVQ h+0(FP), AX - MOVQ c+8(FP), BX - MOVQ flag+16(FP), CX - MOVQ blocks_base+24(FP), SI - MOVQ blocks_len+32(FP), DI - - MOVQ SP, DX - ADDQ $31, DX - ANDQ $~31, DX - - MOVQ CX, 16(DX) - XORQ CX, CX - MOVQ CX, 24(DX) - - VMOVDQU ·AVX2_c40<>(SB), Y4 - VMOVDQU ·AVX2_c48<>(SB), Y5 - - VMOVDQU 0(AX), Y8 +// Requires: AVX, AVX2 +TEXT ·hashBlocksAVX2(SB), NOSPLIT, $320-48 + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + MOVQ SP, DX + ADDQ $+31, DX + ANDQ $-32, DX + MOVQ CX, 16(DX) + XORQ CX, CX + MOVQ CX, 24(DX) + VMOVDQU ·AVX2_c40<>+0(SB), Y4 + VMOVDQU ·AVX2_c48<>+0(SB), Y5 + VMOVDQU (AX), Y8 VMOVDQU 32(AX), Y9 - VMOVDQU ·AVX2_iv0<>(SB), Y6 - VMOVDQU ·AVX2_iv1<>(SB), Y7 - - MOVQ 0(BX), R8 - MOVQ 8(BX), R9 - MOVQ R9, 8(DX) + VMOVDQU ·AVX2_iv0<>+0(SB), Y6 + VMOVDQU ·AVX2_iv1<>+0(SB), Y7 + MOVQ (BX), R8 + MOVQ 8(BX), R9 + MOVQ R9, 8(DX) loop: - ADDQ $128, R8 - MOVQ R8, 0(DX) - CMPQ R8, $128 + ADDQ $0x80, R8 + MOVQ R8, (DX) + CMPQ R8, $0x80 JGE noinc INCQ R9 MOVQ R9, 8(DX) noinc: - VMOVDQA Y8, Y0 - VMOVDQA Y9, Y1 - VMOVDQA Y6, Y2 - VPXOR 0(DX), Y7, Y3 - - LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() - VMOVDQA Y12, 32(DX) - VMOVDQA Y13, 64(DX) - VMOVDQA Y14, 96(DX) - VMOVDQA Y15, 128(DX) - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() - VMOVDQA Y12, 160(DX) - VMOVDQA Y13, 192(DX) - VMOVDQA Y14, 224(DX) - VMOVDQA Y15, 256(DX) - - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - - ROUND_AVX2(32(DX), 64(DX), 96(DX), 128(DX), Y10, Y4, Y5) - ROUND_AVX2(160(DX), 192(DX), 224(DX), 256(DX), Y10, Y4, Y5) - - VPXOR Y0, Y8, Y8 - VPXOR Y1, Y9, Y9 - VPXOR Y2, Y8, Y8 - VPXOR Y3, Y9, Y9 - - LEAQ 128(SI), SI - SUBQ $128, DI - JNE loop - - MOVQ R8, 0(BX) - MOVQ R9, 8(BX) - - VMOVDQU Y8, 0(AX) - VMOVDQU Y9, 32(AX) + VMOVDQA Y8, Y0 + VMOVDQA Y9, Y1 + VMOVDQA Y6, Y2 + VPXOR (DX), Y7, Y3 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x26 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x20 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x30 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x08 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x28 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x38 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x40 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x70 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x78 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VMOVDQA Y12, 32(DX) + VMOVDQA Y13, 64(DX) + VMOVDQA Y14, 96(DX) + VMOVDQA Y15, 128(DX) + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x48 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x68 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x78 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x30 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x58 + VPSHUFD $0x4e, (SI), X14 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x28 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x38 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x18 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VMOVDQA Y12, 160(DX) + VMOVDQA Y13, 192(DX) + VMOVDQA Y14, 224(DX) + VMOVDQA Y15, 256(DX) + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x28 + VMOVDQU 88(SI), X12 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x78 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x40 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x10 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x2e + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x68 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x38 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x48 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x08 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x20 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x58 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x70 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x20 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x78 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x30 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x1e + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x40 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x10 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x50 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x2e + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x20 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x78 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x30 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x18 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x08 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x40 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x60 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x68 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x1e + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x40 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x58 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x18 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x20 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x78 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x08 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x70 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x48 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x70 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x20 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x28 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x50 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x36 + VPSHUFD $0x4e, 64(SI), X11 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x30 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x10 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x58 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x18 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x58 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x08 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x48 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x28 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x40 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x10 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x3e + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x30 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x50 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x30 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x58 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x1e + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x78 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x18 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x40 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x08 + VMOVDQU 96(SI), X14 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x50 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x10 + VMOVDQU 32(SI), X11 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x38 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x38 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x08 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x10 + VPSHUFD $0x4e, 40(SI), X11 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x20 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x78 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x18 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x68 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x58 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x1e + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + VPADDQ 32(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ 64(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ 96(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ 128(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + VPADDQ 160(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ 192(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ 224(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ 256(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + VPXOR Y0, Y8, Y8 + VPXOR Y1, Y9, Y9 + VPXOR Y2, Y8, Y8 + VPXOR Y3, Y9, Y9 + LEAQ 128(SI), SI + SUBQ $0x80, DI + JNE loop + MOVQ R8, (BX) + MOVQ R9, 8(BX) + VMOVDQU Y8, (AX) + VMOVDQU Y9, 32(AX) VZEROUPPER - RET -#define VPUNPCKLQDQ_X2_X2_X15 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xFA -#define VPUNPCKLQDQ_X3_X3_X15 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xFB -#define VPUNPCKLQDQ_X7_X7_X15 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xFF -#define VPUNPCKLQDQ_X13_X13_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x11; BYTE $0x6C; BYTE $0xFD -#define VPUNPCKLQDQ_X14_X14_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x09; BYTE $0x6C; BYTE $0xFE - -#define VPUNPCKHQDQ_X15_X2_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD7 -#define VPUNPCKHQDQ_X15_X3_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDF -#define VPUNPCKHQDQ_X15_X6_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF7 -#define VPUNPCKHQDQ_X15_X7_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFF -#define VPUNPCKHQDQ_X15_X3_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD7 -#define VPUNPCKHQDQ_X15_X7_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF7 -#define VPUNPCKHQDQ_X15_X13_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xDF -#define VPUNPCKHQDQ_X15_X13_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xFF - -#define SHUFFLE_AVX() \ - VMOVDQA X6, X13; \ - VMOVDQA X2, X14; \ - VMOVDQA X4, X6; \ - VPUNPCKLQDQ_X13_X13_X15; \ - VMOVDQA X5, X4; \ - VMOVDQA X6, X5; \ - VPUNPCKHQDQ_X15_X7_X6; \ - VPUNPCKLQDQ_X7_X7_X15; \ - VPUNPCKHQDQ_X15_X13_X7; \ - VPUNPCKLQDQ_X3_X3_X15; \ - VPUNPCKHQDQ_X15_X2_X2; \ - VPUNPCKLQDQ_X14_X14_X15; \ - VPUNPCKHQDQ_X15_X3_X3; \ - -#define SHUFFLE_AVX_INV() \ - VMOVDQA X2, X13; \ - VMOVDQA X4, X14; \ - VPUNPCKLQDQ_X2_X2_X15; \ - VMOVDQA X5, X4; \ - VPUNPCKHQDQ_X15_X3_X2; \ - VMOVDQA X14, X5; \ - VPUNPCKLQDQ_X3_X3_X15; \ - VMOVDQA X6, X14; \ - VPUNPCKHQDQ_X15_X13_X3; \ - VPUNPCKLQDQ_X7_X7_X15; \ - VPUNPCKHQDQ_X15_X6_X6; \ - VPUNPCKLQDQ_X14_X14_X15; \ - VPUNPCKHQDQ_X15_X7_X7; \ - -#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ - VPADDQ m0, v0, v0; \ - VPADDQ v2, v0, v0; \ - VPADDQ m1, v1, v1; \ - VPADDQ v3, v1, v1; \ - VPXOR v0, v6, v6; \ - VPXOR v1, v7, v7; \ - VPSHUFD $-79, v6, v6; \ - VPSHUFD $-79, v7, v7; \ - VPADDQ v6, v4, v4; \ - VPADDQ v7, v5, v5; \ - VPXOR v4, v2, v2; \ - VPXOR v5, v3, v3; \ - VPSHUFB c40, v2, v2; \ - VPSHUFB c40, v3, v3; \ - VPADDQ m2, v0, v0; \ - VPADDQ v2, v0, v0; \ - VPADDQ m3, v1, v1; \ - VPADDQ v3, v1, v1; \ - VPXOR v0, v6, v6; \ - VPXOR v1, v7, v7; \ - VPSHUFB c48, v6, v6; \ - VPSHUFB c48, v7, v7; \ - VPADDQ v6, v4, v4; \ - VPADDQ v7, v5, v5; \ - VPXOR v4, v2, v2; \ - VPXOR v5, v3, v3; \ - VPADDQ v2, v2, t0; \ - VPSRLQ $63, v2, v2; \ - VPXOR t0, v2, v2; \ - VPADDQ v3, v3, t0; \ - VPSRLQ $63, v3, v3; \ - VPXOR t0, v3, v3 - -// load msg: X12 = (i0, i1), X13 = (i2, i3), X14 = (i4, i5), X15 = (i6, i7) -// i0, i1, i2, i3, i4, i5, i6, i7 must not be 0 -#define LOAD_MSG_AVX(i0, i1, i2, i3, i4, i5, i6, i7) \ - VMOVQ_SI_X12(i0*8); \ - VMOVQ_SI_X13(i2*8); \ - VMOVQ_SI_X14(i4*8); \ - VMOVQ_SI_X15(i6*8); \ - VPINSRQ_1_SI_X12(i1*8); \ - VPINSRQ_1_SI_X13(i3*8); \ - VPINSRQ_1_SI_X14(i5*8); \ - VPINSRQ_1_SI_X15(i7*8) - -// load msg: X12 = (0, 2), X13 = (4, 6), X14 = (1, 3), X15 = (5, 7) -#define LOAD_MSG_AVX_0_2_4_6_1_3_5_7() \ - VMOVQ_SI_X12_0; \ - VMOVQ_SI_X13(4*8); \ - VMOVQ_SI_X14(1*8); \ - VMOVQ_SI_X15(5*8); \ - VPINSRQ_1_SI_X12(2*8); \ - VPINSRQ_1_SI_X13(6*8); \ - VPINSRQ_1_SI_X14(3*8); \ - VPINSRQ_1_SI_X15(7*8) - -// load msg: X12 = (1, 0), X13 = (11, 5), X14 = (12, 2), X15 = (7, 3) -#define LOAD_MSG_AVX_1_0_11_5_12_2_7_3() \ - VPSHUFD $0x4E, 0*8(SI), X12; \ - VMOVQ_SI_X13(11*8); \ - VMOVQ_SI_X14(12*8); \ - VMOVQ_SI_X15(7*8); \ - VPINSRQ_1_SI_X13(5*8); \ - VPINSRQ_1_SI_X14(2*8); \ - VPINSRQ_1_SI_X15(3*8) - -// load msg: X12 = (11, 12), X13 = (5, 15), X14 = (8, 0), X15 = (2, 13) -#define LOAD_MSG_AVX_11_12_5_15_8_0_2_13() \ - VMOVDQU 11*8(SI), X12; \ - VMOVQ_SI_X13(5*8); \ - VMOVQ_SI_X14(8*8); \ - VMOVQ_SI_X15(2*8); \ - VPINSRQ_1_SI_X13(15*8); \ - VPINSRQ_1_SI_X14_0; \ - VPINSRQ_1_SI_X15(13*8) - -// load msg: X12 = (2, 5), X13 = (4, 15), X14 = (6, 10), X15 = (0, 8) -#define LOAD_MSG_AVX_2_5_4_15_6_10_0_8() \ - VMOVQ_SI_X12(2*8); \ - VMOVQ_SI_X13(4*8); \ - VMOVQ_SI_X14(6*8); \ - VMOVQ_SI_X15_0; \ - VPINSRQ_1_SI_X12(5*8); \ - VPINSRQ_1_SI_X13(15*8); \ - VPINSRQ_1_SI_X14(10*8); \ - VPINSRQ_1_SI_X15(8*8) +DATA ·AVX2_c40<>+0(SB)/8, $0x0201000706050403 +DATA ·AVX2_c40<>+8(SB)/8, $0x0a09080f0e0d0c0b +DATA ·AVX2_c40<>+16(SB)/8, $0x0201000706050403 +DATA ·AVX2_c40<>+24(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·AVX2_c40<>(SB), RODATA|NOPTR, $32 -// load msg: X12 = (9, 5), X13 = (2, 10), X14 = (0, 7), X15 = (4, 15) -#define LOAD_MSG_AVX_9_5_2_10_0_7_4_15() \ - VMOVQ_SI_X12(9*8); \ - VMOVQ_SI_X13(2*8); \ - VMOVQ_SI_X14_0; \ - VMOVQ_SI_X15(4*8); \ - VPINSRQ_1_SI_X12(5*8); \ - VPINSRQ_1_SI_X13(10*8); \ - VPINSRQ_1_SI_X14(7*8); \ - VPINSRQ_1_SI_X15(15*8) +DATA ·AVX2_c48<>+0(SB)/8, $0x0100070605040302 +DATA ·AVX2_c48<>+8(SB)/8, $0x09080f0e0d0c0b0a +DATA ·AVX2_c48<>+16(SB)/8, $0x0100070605040302 +DATA ·AVX2_c48<>+24(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·AVX2_c48<>(SB), RODATA|NOPTR, $32 -// load msg: X12 = (2, 6), X13 = (0, 8), X14 = (12, 10), X15 = (11, 3) -#define LOAD_MSG_AVX_2_6_0_8_12_10_11_3() \ - VMOVQ_SI_X12(2*8); \ - VMOVQ_SI_X13_0; \ - VMOVQ_SI_X14(12*8); \ - VMOVQ_SI_X15(11*8); \ - VPINSRQ_1_SI_X12(6*8); \ - VPINSRQ_1_SI_X13(8*8); \ - VPINSRQ_1_SI_X14(10*8); \ - VPINSRQ_1_SI_X15(3*8) +DATA ·AVX2_iv0<>+0(SB)/8, $0x6a09e667f3bcc908 +DATA ·AVX2_iv0<>+8(SB)/8, $0xbb67ae8584caa73b +DATA ·AVX2_iv0<>+16(SB)/8, $0x3c6ef372fe94f82b +DATA ·AVX2_iv0<>+24(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·AVX2_iv0<>(SB), RODATA|NOPTR, $32 -// load msg: X12 = (0, 6), X13 = (9, 8), X14 = (7, 3), X15 = (2, 11) -#define LOAD_MSG_AVX_0_6_9_8_7_3_2_11() \ - MOVQ 0*8(SI), X12; \ - VPSHUFD $0x4E, 8*8(SI), X13; \ - MOVQ 7*8(SI), X14; \ - MOVQ 2*8(SI), X15; \ - VPINSRQ_1_SI_X12(6*8); \ - VPINSRQ_1_SI_X14(3*8); \ - VPINSRQ_1_SI_X15(11*8) - -// load msg: X12 = (6, 14), X13 = (11, 0), X14 = (15, 9), X15 = (3, 8) -#define LOAD_MSG_AVX_6_14_11_0_15_9_3_8() \ - MOVQ 6*8(SI), X12; \ - MOVQ 11*8(SI), X13; \ - MOVQ 15*8(SI), X14; \ - MOVQ 3*8(SI), X15; \ - VPINSRQ_1_SI_X12(14*8); \ - VPINSRQ_1_SI_X13_0; \ - VPINSRQ_1_SI_X14(9*8); \ - VPINSRQ_1_SI_X15(8*8) - -// load msg: X12 = (5, 15), X13 = (8, 2), X14 = (0, 4), X15 = (6, 10) -#define LOAD_MSG_AVX_5_15_8_2_0_4_6_10() \ - MOVQ 5*8(SI), X12; \ - MOVQ 8*8(SI), X13; \ - MOVQ 0*8(SI), X14; \ - MOVQ 6*8(SI), X15; \ - VPINSRQ_1_SI_X12(15*8); \ - VPINSRQ_1_SI_X13(2*8); \ - VPINSRQ_1_SI_X14(4*8); \ - VPINSRQ_1_SI_X15(10*8) - -// load msg: X12 = (12, 13), X13 = (1, 10), X14 = (2, 7), X15 = (4, 5) -#define LOAD_MSG_AVX_12_13_1_10_2_7_4_5() \ - VMOVDQU 12*8(SI), X12; \ - MOVQ 1*8(SI), X13; \ - MOVQ 2*8(SI), X14; \ - VPINSRQ_1_SI_X13(10*8); \ - VPINSRQ_1_SI_X14(7*8); \ - VMOVDQU 4*8(SI), X15 - -// load msg: X12 = (15, 9), X13 = (3, 13), X14 = (11, 14), X15 = (12, 0) -#define LOAD_MSG_AVX_15_9_3_13_11_14_12_0() \ - MOVQ 15*8(SI), X12; \ - MOVQ 3*8(SI), X13; \ - MOVQ 11*8(SI), X14; \ - MOVQ 12*8(SI), X15; \ - VPINSRQ_1_SI_X12(9*8); \ - VPINSRQ_1_SI_X13(13*8); \ - VPINSRQ_1_SI_X14(14*8); \ - VPINSRQ_1_SI_X15_0 +DATA ·AVX2_iv1<>+0(SB)/8, $0x510e527fade682d1 +DATA ·AVX2_iv1<>+8(SB)/8, $0x9b05688c2b3e6c1f +DATA ·AVX2_iv1<>+16(SB)/8, $0x1f83d9abfb41bd6b +DATA ·AVX2_iv1<>+24(SB)/8, $0x5be0cd19137e2179 +GLOBL ·AVX2_iv1<>(SB), RODATA|NOPTR, $32 // func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) -TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment - MOVQ h+0(FP), AX - MOVQ c+8(FP), BX - MOVQ flag+16(FP), CX - MOVQ blocks_base+24(FP), SI - MOVQ blocks_len+32(FP), DI - - MOVQ SP, R10 - ADDQ $15, R10 - ANDQ $~15, R10 - - VMOVDQU ·AVX_c40<>(SB), X0 - VMOVDQU ·AVX_c48<>(SB), X1 +// Requires: AVX, SSE2 +TEXT ·hashBlocksAVX(SB), NOSPLIT, $288-48 + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + MOVQ SP, R10 + ADDQ $0x0f, R10 + ANDQ $-16, R10 + VMOVDQU ·AVX_c40<>+0(SB), X0 + VMOVDQU ·AVX_c48<>+0(SB), X1 VMOVDQA X0, X8 VMOVDQA X1, X9 - - VMOVDQU ·AVX_iv3<>(SB), X0 - VMOVDQA X0, 0(R10) - XORQ CX, 0(R10) // 0(R10) = ·AVX_iv3 ^ (CX || 0) - - VMOVDQU 0(AX), X10 + VMOVDQU ·AVX_iv3<>+0(SB), X0 + VMOVDQA X0, (R10) + XORQ CX, (R10) + VMOVDQU (AX), X10 VMOVDQU 16(AX), X11 VMOVDQU 32(AX), X2 VMOVDQU 48(AX), X3 - - MOVQ 0(BX), R8 - MOVQ 8(BX), R9 + MOVQ (BX), R8 + MOVQ 8(BX), R9 loop: - ADDQ $128, R8 - CMPQ R8, $128 + ADDQ $0x80, R8 + CMPQ R8, $0x80 JGE noinc INCQ R9 noinc: - VMOVQ_R8_X15 - VPINSRQ_1_R9_X15 - + BYTE $0xc4 + BYTE $0x41 + BYTE $0xf9 + BYTE $0x6e + BYTE $0xf8 + BYTE $0xc4 + BYTE $0x43 + BYTE $0x81 + BYTE $0x22 + BYTE $0xf9 + BYTE $0x01 VMOVDQA X10, X0 VMOVDQA X11, X1 - VMOVDQU ·AVX_iv0<>(SB), X4 - VMOVDQU ·AVX_iv1<>(SB), X5 - VMOVDQU ·AVX_iv2<>(SB), X6 - + VMOVDQU ·AVX_iv0<>+0(SB), X4 + VMOVDQU ·AVX_iv1<>+0(SB), X5 + VMOVDQU ·AVX_iv2<>+0(SB), X6 VPXOR X15, X6, X6 - VMOVDQA 0(R10), X7 - - LOAD_MSG_AVX_0_2_4_6_1_3_5_7() + VMOVDQA (R10), X7 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x26 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x20 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x08 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x28 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x38 + BYTE $0x01 VMOVDQA X12, 16(R10) VMOVDQA X13, 32(R10) VMOVDQA X14, 48(R10) VMOVDQA X15, 64(R10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX(8, 10, 12, 14, 9, 11, 13, 15) + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x40 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x78 + BYTE $0x01 VMOVDQA X12, 80(R10) VMOVDQA X13, 96(R10) VMOVDQA X14, 112(R10) VMOVDQA X15, 128(R10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(14, 4, 9, 13, 10, 8, 15, 6) + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x78 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x68 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x30 + BYTE $0x01 VMOVDQA X12, 144(R10) VMOVDQA X13, 160(R10) VMOVDQA X14, 176(R10) VMOVDQA X15, 192(R10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_1_0_11_5_12_2_7_3() + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + VPSHUFD $0x4e, (SI), X12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x58 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x38 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x18 + BYTE $0x01 VMOVDQA X12, 208(R10) VMOVDQA X13, 224(R10) VMOVDQA X14, 240(R10) VMOVDQA X15, 256(R10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX_11_12_5_15_8_0_2_13() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX(10, 3, 7, 9, 14, 6, 1, 4) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(7, 3, 13, 11, 9, 1, 12, 14) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_2_5_4_15_6_10_0_8() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX_9_5_2_10_0_7_4_15() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX(14, 11, 6, 3, 1, 12, 8, 13) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX_2_6_0_8_12_10_11_3() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX(4, 7, 15, 1, 13, 5, 14, 9) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(12, 1, 14, 4, 5, 15, 13, 10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_0_6_9_8_7_3_2_11() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(13, 7, 12, 3, 11, 14, 1, 9) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_5_15_8_2_0_4_6_10() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX_6_14_11_0_15_9_3_8() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_12_13_1_10_2_7_4_5() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(10, 8, 7, 1, 2, 4, 6, 5) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_15_9_3_13_11_14_12_0() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X15, X8, X9) - SHUFFLE_AVX() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X15, X8, X9) - SHUFFLE_AVX_INV() - - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X15, X8, X9) - SHUFFLE_AVX() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X15, X8, X9) - SHUFFLE_AVX_INV() - + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + VMOVDQU 88(SI), X12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x28 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x40 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x10 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x36 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x68 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x08 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x20 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x70 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x20 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x30 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x3e + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x40 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x36 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x20 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x78 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x30 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x08 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x40 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x60 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x68 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x2e + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x58 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x18 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x20 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x78 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x70 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x48 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x28 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x50 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + MOVQ (SI), X12 + VPSHUFD $0x4e, 64(SI), X13 + MOVQ 56(SI), X14 + MOVQ 16(SI), X15 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x58 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x58 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x08 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x48 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + MOVQ 40(SI), X12 + MOVQ 64(SI), X13 + MOVQ (SI), X14 + MOVQ 48(SI), X15 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x50 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + MOVQ 48(SI), X12 + MOVQ 88(SI), X13 + MOVQ 120(SI), X14 + MOVQ 24(SI), X15 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x2e + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x40 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + VMOVDQU 96(SI), X12 + MOVQ 8(SI), X13 + MOVQ 16(SI), X14 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x38 + BYTE $0x01 + VMOVDQU 32(SI), X15 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x30 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x28 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + MOVQ 120(SI), X12 + MOVQ 24(SI), X13 + MOVQ 88(SI), X14 + MOVQ 96(SI), X15 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x68 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x3e + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + VPADDQ 16(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 32(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ 48(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 64(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + VPADDQ 80(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 96(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ 112(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 128(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + VPADDQ 144(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 160(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ 176(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 192(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + VPADDQ 208(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 224(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ 240(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 256(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff VMOVDQU 32(AX), X14 VMOVDQU 48(AX), X15 VPXOR X0, X10, X10 @@ -729,16 +4524,36 @@ noinc: VPXOR X7, X15, X3 VMOVDQU X2, 32(AX) VMOVDQU X3, 48(AX) + LEAQ 128(SI), SI + SUBQ $0x80, DI + JNE loop + VMOVDQU X10, (AX) + VMOVDQU X11, 16(AX) + MOVQ R8, (BX) + MOVQ R9, 8(BX) + VZEROUPPER + RET - LEAQ 128(SI), SI - SUBQ $128, DI - JNE loop +DATA ·AVX_c40<>+0(SB)/8, $0x0201000706050403 +DATA ·AVX_c40<>+8(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·AVX_c40<>(SB), RODATA|NOPTR, $16 - VMOVDQU X10, 0(AX) - VMOVDQU X11, 16(AX) +DATA ·AVX_c48<>+0(SB)/8, $0x0100070605040302 +DATA ·AVX_c48<>+8(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·AVX_c48<>(SB), RODATA|NOPTR, $16 - MOVQ R8, 0(BX) - MOVQ R9, 8(BX) - VZEROUPPER +DATA ·AVX_iv3<>+0(SB)/8, $0x1f83d9abfb41bd6b +DATA ·AVX_iv3<>+8(SB)/8, $0x5be0cd19137e2179 +GLOBL ·AVX_iv3<>(SB), RODATA|NOPTR, $16 - RET +DATA ·AVX_iv0<>+0(SB)/8, $0x6a09e667f3bcc908 +DATA ·AVX_iv0<>+8(SB)/8, $0xbb67ae8584caa73b +GLOBL ·AVX_iv0<>(SB), RODATA|NOPTR, $16 + +DATA ·AVX_iv1<>+0(SB)/8, $0x3c6ef372fe94f82b +DATA ·AVX_iv1<>+8(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·AVX_iv1<>(SB), RODATA|NOPTR, $16 + +DATA ·AVX_iv2<>+0(SB)/8, $0x510e527fade682d1 +DATA ·AVX_iv2<>+8(SB)/8, $0x9b05688c2b3e6c1f +GLOBL ·AVX_iv2<>(SB), RODATA|NOPTR, $16 diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s index adfac00..9a0ce21 100644 --- a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s +++ b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s @@ -1,278 +1,1441 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run blake2b_amd64_asm.go -out ../../blake2b_amd64.s -pkg blake2b. DO NOT EDIT. //go:build amd64 && gc && !purego #include "textflag.h" -DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 -DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b -GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16 - -DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b -DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 -GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16 - -DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1 -DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f -GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16 - -DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b -DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 -GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16 - -DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 - -DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 - -#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ - MOVO v4, t1; \ - MOVO v5, v4; \ - MOVO t1, v5; \ - MOVO v6, t1; \ - PUNPCKLQDQ v6, t2; \ - PUNPCKHQDQ v7, v6; \ - PUNPCKHQDQ t2, v6; \ - PUNPCKLQDQ v7, t2; \ - MOVO t1, v7; \ - MOVO v2, t1; \ - PUNPCKHQDQ t2, v7; \ - PUNPCKLQDQ v3, t2; \ - PUNPCKHQDQ t2, v2; \ - PUNPCKLQDQ t1, t2; \ - PUNPCKHQDQ t2, v3 - -#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ - MOVO v4, t1; \ - MOVO v5, v4; \ - MOVO t1, v5; \ - MOVO v2, t1; \ - PUNPCKLQDQ v2, t2; \ - PUNPCKHQDQ v3, v2; \ - PUNPCKHQDQ t2, v2; \ - PUNPCKLQDQ v3, t2; \ - MOVO t1, v3; \ - MOVO v6, t1; \ - PUNPCKHQDQ t2, v3; \ - PUNPCKLQDQ v7, t2; \ - PUNPCKHQDQ t2, v6; \ - PUNPCKLQDQ t1, t2; \ - PUNPCKHQDQ t2, v7 - -#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ - PADDQ m0, v0; \ - PADDQ m1, v1; \ - PADDQ v2, v0; \ - PADDQ v3, v1; \ - PXOR v0, v6; \ - PXOR v1, v7; \ - PSHUFD $0xB1, v6, v6; \ - PSHUFD $0xB1, v7, v7; \ - PADDQ v6, v4; \ - PADDQ v7, v5; \ - PXOR v4, v2; \ - PXOR v5, v3; \ - PSHUFB c40, v2; \ - PSHUFB c40, v3; \ - PADDQ m2, v0; \ - PADDQ m3, v1; \ - PADDQ v2, v0; \ - PADDQ v3, v1; \ - PXOR v0, v6; \ - PXOR v1, v7; \ - PSHUFB c48, v6; \ - PSHUFB c48, v7; \ - PADDQ v6, v4; \ - PADDQ v7, v5; \ - PXOR v4, v2; \ - PXOR v5, v3; \ - MOVOU v2, t0; \ - PADDQ v2, t0; \ - PSRLQ $63, v2; \ - PXOR t0, v2; \ - MOVOU v3, t0; \ - PADDQ v3, t0; \ - PSRLQ $63, v3; \ - PXOR t0, v3 - -#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \ - MOVQ i0*8(src), m0; \ - PINSRQ $1, i1*8(src), m0; \ - MOVQ i2*8(src), m1; \ - PINSRQ $1, i3*8(src), m1; \ - MOVQ i4*8(src), m2; \ - PINSRQ $1, i5*8(src), m2; \ - MOVQ i6*8(src), m3; \ - PINSRQ $1, i7*8(src), m3 - // func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) -TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment - MOVQ h+0(FP), AX - MOVQ c+8(FP), BX - MOVQ flag+16(FP), CX - MOVQ blocks_base+24(FP), SI - MOVQ blocks_len+32(FP), DI - - MOVQ SP, R10 - ADDQ $15, R10 - ANDQ $~15, R10 - - MOVOU ·iv3<>(SB), X0 - MOVO X0, 0(R10) - XORQ CX, 0(R10) // 0(R10) = ·iv3 ^ (CX || 0) - - MOVOU ·c40<>(SB), X13 - MOVOU ·c48<>(SB), X14 - - MOVOU 0(AX), X12 +// Requires: SSE2, SSE4.1, SSSE3 +TEXT ·hashBlocksSSE4(SB), NOSPLIT, $288-48 + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + MOVQ SP, R10 + ADDQ $0x0f, R10 + ANDQ $-16, R10 + MOVOU ·iv3<>+0(SB), X0 + MOVO X0, (R10) + XORQ CX, (R10) + MOVOU ·c40<>+0(SB), X13 + MOVOU ·c48<>+0(SB), X14 + MOVOU (AX), X12 MOVOU 16(AX), X15 - - MOVQ 0(BX), R8 - MOVQ 8(BX), R9 + MOVQ (BX), R8 + MOVQ 8(BX), R9 loop: - ADDQ $128, R8 - CMPQ R8, $128 + ADDQ $0x80, R8 + CMPQ R8, $0x80 JGE noinc INCQ R9 noinc: - MOVQ R8, X8 - PINSRQ $1, R9, X8 - - MOVO X12, X0 - MOVO X15, X1 - MOVOU 32(AX), X2 - MOVOU 48(AX), X3 - MOVOU ·iv0<>(SB), X4 - MOVOU ·iv1<>(SB), X5 - MOVOU ·iv2<>(SB), X6 - - PXOR X8, X6 - MOVO 0(R10), X7 - - LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7) - MOVO X8, 16(R10) - MOVO X9, 32(R10) - MOVO X10, 48(R10) - MOVO X11, 64(R10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15) - MOVO X8, 80(R10) - MOVO X9, 96(R10) - MOVO X10, 112(R10) - MOVO X11, 128(R10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6) - MOVO X8, 144(R10) - MOVO X9, 160(R10) - MOVO X10, 176(R10) - MOVO X11, 192(R10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3) - MOVO X8, 208(R10) - MOVO X9, 224(R10) - MOVO X10, 240(R10) - MOVO X11, 256(R10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + MOVQ R8, X8 + PINSRQ $0x01, R9, X8 + MOVO X12, X0 + MOVO X15, X1 + MOVOU 32(AX), X2 + MOVOU 48(AX), X3 + MOVOU ·iv0<>+0(SB), X4 + MOVOU ·iv1<>+0(SB), X5 + MOVOU ·iv2<>+0(SB), X6 + PXOR X8, X6 + MOVO (R10), X7 + MOVQ (SI), X8 + PINSRQ $0x01, 16(SI), X8 + MOVQ 32(SI), X9 + PINSRQ $0x01, 48(SI), X9 + MOVQ 8(SI), X10 + PINSRQ $0x01, 24(SI), X10 + MOVQ 40(SI), X11 + PINSRQ $0x01, 56(SI), X11 + MOVO X8, 16(R10) + MOVO X9, 32(R10) + MOVO X10, 48(R10) + MOVO X11, 64(R10) + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 64(SI), X8 + PINSRQ $0x01, 80(SI), X8 + MOVQ 96(SI), X9 + PINSRQ $0x01, 112(SI), X9 + MOVQ 72(SI), X10 + PINSRQ $0x01, 88(SI), X10 + MOVQ 104(SI), X11 + PINSRQ $0x01, 120(SI), X11 + MOVO X8, 80(R10) + MOVO X9, 96(R10) + MOVO X10, 112(R10) + MOVO X11, 128(R10) + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 112(SI), X8 + PINSRQ $0x01, 32(SI), X8 + MOVQ 72(SI), X9 + PINSRQ $0x01, 104(SI), X9 + MOVQ 80(SI), X10 + PINSRQ $0x01, 64(SI), X10 + MOVQ 120(SI), X11 + PINSRQ $0x01, 48(SI), X11 + MOVO X8, 144(R10) + MOVO X9, 160(R10) + MOVO X10, 176(R10) + MOVO X11, 192(R10) + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 8(SI), X8 + PINSRQ $0x01, (SI), X8 + MOVQ 88(SI), X9 + PINSRQ $0x01, 40(SI), X9 + MOVQ 96(SI), X10 + PINSRQ $0x01, 16(SI), X10 + MOVQ 56(SI), X11 + PINSRQ $0x01, 24(SI), X11 + MOVO X8, 208(R10) + MOVO X9, 224(R10) + MOVO X10, 240(R10) + MOVO X11, 256(R10) + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 88(SI), X8 + PINSRQ $0x01, 96(SI), X8 + MOVQ 40(SI), X9 + PINSRQ $0x01, 120(SI), X9 + MOVQ 64(SI), X10 + PINSRQ $0x01, (SI), X10 + MOVQ 16(SI), X11 + PINSRQ $0x01, 104(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 80(SI), X8 + PINSRQ $0x01, 24(SI), X8 + MOVQ 56(SI), X9 + PINSRQ $0x01, 72(SI), X9 + MOVQ 112(SI), X10 + PINSRQ $0x01, 48(SI), X10 + MOVQ 8(SI), X11 + PINSRQ $0x01, 32(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 56(SI), X8 + PINSRQ $0x01, 24(SI), X8 + MOVQ 104(SI), X9 + PINSRQ $0x01, 88(SI), X9 + MOVQ 72(SI), X10 + PINSRQ $0x01, 8(SI), X10 + MOVQ 96(SI), X11 + PINSRQ $0x01, 112(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 16(SI), X8 + PINSRQ $0x01, 40(SI), X8 + MOVQ 32(SI), X9 + PINSRQ $0x01, 120(SI), X9 + MOVQ 48(SI), X10 + PINSRQ $0x01, 80(SI), X10 + MOVQ (SI), X11 + PINSRQ $0x01, 64(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 72(SI), X8 + PINSRQ $0x01, 40(SI), X8 + MOVQ 16(SI), X9 + PINSRQ $0x01, 80(SI), X9 + MOVQ (SI), X10 + PINSRQ $0x01, 56(SI), X10 + MOVQ 32(SI), X11 + PINSRQ $0x01, 120(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 112(SI), X8 + PINSRQ $0x01, 88(SI), X8 + MOVQ 48(SI), X9 + PINSRQ $0x01, 24(SI), X9 + MOVQ 8(SI), X10 + PINSRQ $0x01, 96(SI), X10 + MOVQ 64(SI), X11 + PINSRQ $0x01, 104(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 16(SI), X8 + PINSRQ $0x01, 48(SI), X8 + MOVQ (SI), X9 + PINSRQ $0x01, 64(SI), X9 + MOVQ 96(SI), X10 + PINSRQ $0x01, 80(SI), X10 + MOVQ 88(SI), X11 + PINSRQ $0x01, 24(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 32(SI), X8 + PINSRQ $0x01, 56(SI), X8 + MOVQ 120(SI), X9 + PINSRQ $0x01, 8(SI), X9 + MOVQ 104(SI), X10 + PINSRQ $0x01, 40(SI), X10 + MOVQ 112(SI), X11 + PINSRQ $0x01, 72(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 96(SI), X8 + PINSRQ $0x01, 8(SI), X8 + MOVQ 112(SI), X9 + PINSRQ $0x01, 32(SI), X9 + MOVQ 40(SI), X10 + PINSRQ $0x01, 120(SI), X10 + MOVQ 104(SI), X11 + PINSRQ $0x01, 80(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ (SI), X8 + PINSRQ $0x01, 48(SI), X8 + MOVQ 72(SI), X9 + PINSRQ $0x01, 64(SI), X9 + MOVQ 56(SI), X10 + PINSRQ $0x01, 24(SI), X10 + MOVQ 16(SI), X11 + PINSRQ $0x01, 88(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 104(SI), X8 + PINSRQ $0x01, 56(SI), X8 + MOVQ 96(SI), X9 + PINSRQ $0x01, 24(SI), X9 + MOVQ 88(SI), X10 + PINSRQ $0x01, 112(SI), X10 + MOVQ 8(SI), X11 + PINSRQ $0x01, 72(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 40(SI), X8 + PINSRQ $0x01, 120(SI), X8 + MOVQ 64(SI), X9 + PINSRQ $0x01, 16(SI), X9 + MOVQ (SI), X10 + PINSRQ $0x01, 32(SI), X10 + MOVQ 48(SI), X11 + PINSRQ $0x01, 80(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 48(SI), X8 + PINSRQ $0x01, 112(SI), X8 + MOVQ 88(SI), X9 + PINSRQ $0x01, (SI), X9 + MOVQ 120(SI), X10 + PINSRQ $0x01, 72(SI), X10 + MOVQ 24(SI), X11 + PINSRQ $0x01, 64(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 96(SI), X8 + PINSRQ $0x01, 104(SI), X8 + MOVQ 8(SI), X9 + PINSRQ $0x01, 80(SI), X9 + MOVQ 16(SI), X10 + PINSRQ $0x01, 56(SI), X10 + MOVQ 32(SI), X11 + PINSRQ $0x01, 40(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 80(SI), X8 + PINSRQ $0x01, 64(SI), X8 + MOVQ 56(SI), X9 + PINSRQ $0x01, 8(SI), X9 + MOVQ 16(SI), X10 + PINSRQ $0x01, 32(SI), X10 + MOVQ 48(SI), X11 + PINSRQ $0x01, 40(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 120(SI), X8 + PINSRQ $0x01, 72(SI), X8 + MOVQ 24(SI), X9 + PINSRQ $0x01, 104(SI), X9 + MOVQ 88(SI), X10 + PINSRQ $0x01, 112(SI), X10 + MOVQ 96(SI), X11 + PINSRQ $0x01, (SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + PADDQ 16(R10), X0 + PADDQ 32(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ 48(R10), X0 + PADDQ 64(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + PADDQ 80(R10), X0 + PADDQ 96(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ 112(R10), X0 + PADDQ 128(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + PADDQ 144(R10), X0 + PADDQ 160(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ 176(R10), X0 + PADDQ 192(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + PADDQ 208(R10), X0 + PADDQ 224(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ 240(R10), X0 + PADDQ 256(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU 32(AX), X10 + MOVOU 48(AX), X11 + PXOR X0, X12 + PXOR X1, X15 + PXOR X2, X10 + PXOR X3, X11 + PXOR X4, X12 + PXOR X5, X15 + PXOR X6, X10 + PXOR X7, X11 + MOVOU X10, 32(AX) + MOVOU X11, 48(AX) + LEAQ 128(SI), SI + SUBQ $0x80, DI + JNE loop + MOVOU X12, (AX) + MOVOU X15, 16(AX) + MOVQ R8, (BX) + MOVQ R9, 8(BX) + RET - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) +DATA ·iv3<>+0(SB)/8, $0x1f83d9abfb41bd6b +DATA ·iv3<>+8(SB)/8, $0x5be0cd19137e2179 +GLOBL ·iv3<>(SB), RODATA|NOPTR, $16 - MOVOU 32(AX), X10 - MOVOU 48(AX), X11 - PXOR X0, X12 - PXOR X1, X15 - PXOR X2, X10 - PXOR X3, X11 - PXOR X4, X12 - PXOR X5, X15 - PXOR X6, X10 - PXOR X7, X11 - MOVOU X10, 32(AX) - MOVOU X11, 48(AX) +DATA ·c40<>+0(SB)/8, $0x0201000706050403 +DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·c40<>(SB), RODATA|NOPTR, $16 - LEAQ 128(SI), SI - SUBQ $128, DI - JNE loop +DATA ·c48<>+0(SB)/8, $0x0100070605040302 +DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·c48<>(SB), RODATA|NOPTR, $16 - MOVOU X12, 0(AX) - MOVOU X15, 16(AX) +DATA ·iv0<>+0(SB)/8, $0x6a09e667f3bcc908 +DATA ·iv0<>+8(SB)/8, $0xbb67ae8584caa73b +GLOBL ·iv0<>(SB), RODATA|NOPTR, $16 - MOVQ R8, 0(BX) - MOVQ R9, 8(BX) +DATA ·iv1<>+0(SB)/8, $0x3c6ef372fe94f82b +DATA ·iv1<>+8(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·iv1<>(SB), RODATA|NOPTR, $16 - RET +DATA ·iv2<>+0(SB)/8, $0x510e527fade682d1 +DATA ·iv2<>+8(SB)/8, $0x9b05688c2b3e6c1f +GLOBL ·iv2<>(SB), RODATA|NOPTR, $16 diff --git a/vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s b/vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s new file mode 100644 index 0000000..ec2acfe --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s @@ -0,0 +1,17 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build darwin && amd64 && gc + +#include "textflag.h" + +TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_sysctl(SB) +GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8 +DATA ·libc_sysctl_trampoline_addr(SB)/8, $libc_sysctl_trampoline<>(SB) + +TEXT libc_sysctlbyname_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_sysctlbyname(SB) +GLOBL ·libc_sysctlbyname_trampoline_addr(SB), RODATA, $8 +DATA ·libc_sysctlbyname_trampoline_addr(SB)/8, $libc_sysctlbyname_trampoline<>(SB) diff --git a/vendor/golang.org/x/sys/cpu/cpu.go b/vendor/golang.org/x/sys/cpu/cpu.go index ec07aab..02609d5 100644 --- a/vendor/golang.org/x/sys/cpu/cpu.go +++ b/vendor/golang.org/x/sys/cpu/cpu.go @@ -201,6 +201,25 @@ var S390X struct { _ CacheLinePad } +// RISCV64 contains the supported CPU features and performance characteristics for riscv64 +// platforms. The booleans in RISCV64, with the exception of HasFastMisaligned, indicate +// the presence of RISC-V extensions. +// +// It is safe to assume that all the RV64G extensions are supported and so they are omitted from +// this structure. As riscv64 Go programs require at least RV64G, the code that populates +// this structure cannot run successfully if some of the RV64G extensions are missing. +// The struct is padded to avoid false sharing. +var RISCV64 struct { + _ CacheLinePad + HasFastMisaligned bool // Fast misaligned accesses + HasC bool // Compressed instruction-set extension + HasV bool // Vector extension compatible with RVV 1.0 + HasZba bool // Address generation instructions extension + HasZbb bool // Basic bit-manipulation extension + HasZbs bool // Single-bit instructions extension + _ CacheLinePad +} + func init() { archInit() initOptions() diff --git a/vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go b/vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go new file mode 100644 index 0000000..b838cb9 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go @@ -0,0 +1,61 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build darwin && amd64 && gc + +package cpu + +// darwinSupportsAVX512 checks Darwin kernel for AVX512 support via sysctl +// call (see issue 43089). It also restricts AVX512 support for Darwin to +// kernel version 21.3.0 (MacOS 12.2.0) or later (see issue 49233). +// +// Background: +// Darwin implements a special mechanism to economize on thread state when +// AVX512 specific registers are not in use. This scheme minimizes state when +// preempting threads that haven't yet used any AVX512 instructions, but adds +// special requirements to check for AVX512 hardware support at runtime (e.g. +// via sysctl call or commpage inspection). See issue 43089 and link below for +// full background: +// https://github.com/apple-oss-distributions/xnu/blob/xnu-11215.1.10/osfmk/i386/fpu.c#L214-L240 +// +// Additionally, all versions of the Darwin kernel from 19.6.0 through 21.2.0 +// (corresponding to MacOS 10.15.6 - 12.1) have a bug that can cause corruption +// of the AVX512 mask registers (K0-K7) upon signal return. For this reason +// AVX512 is considered unsafe to use on Darwin for kernel versions prior to +// 21.3.0, where a fix has been confirmed. See issue 49233 for full background. +func darwinSupportsAVX512() bool { + return darwinSysctlEnabled([]byte("hw.optional.avx512f\x00")) && darwinKernelVersionCheck(21, 3, 0) +} + +// Ensure Darwin kernel version is at least major.minor.patch, avoiding dependencies +func darwinKernelVersionCheck(major, minor, patch int) bool { + var release [256]byte + err := darwinOSRelease(&release) + if err != nil { + return false + } + + var mmp [3]int + c := 0 +Loop: + for _, b := range release[:] { + switch { + case b >= '0' && b <= '9': + mmp[c] = 10*mmp[c] + int(b-'0') + case b == '.': + c++ + if c > 2 { + return false + } + case b == 0: + break Loop + default: + return false + } + } + if c != 2 { + return false + } + return mmp[0] > major || mmp[0] == major && (mmp[1] > minor || mmp[1] == minor && mmp[2] >= patch) +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go index 910728f..32a4451 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go +++ b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go @@ -6,10 +6,10 @@ package cpu -// cpuid is implemented in cpu_x86.s for gc compiler +// cpuid is implemented in cpu_gc_x86.s for gc compiler // and in cpu_gccgo.c for gccgo. func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) -// xgetbv with ecx = 0 is implemented in cpu_x86.s for gc compiler +// xgetbv with ecx = 0 is implemented in cpu_gc_x86.s for gc compiler // and in cpu_gccgo.c for gccgo. func xgetbv() (eax, edx uint32) diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.s b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.s similarity index 94% rename from vendor/golang.org/x/sys/cpu/cpu_x86.s rename to vendor/golang.org/x/sys/cpu/cpu_gc_x86.s index 7d7ba33..ce208ce 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_x86.s +++ b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.s @@ -18,7 +18,7 @@ TEXT ·cpuid(SB), NOSPLIT, $0-24 RET // func xgetbv() (eax, edx uint32) -TEXT ·xgetbv(SB),NOSPLIT,$0-8 +TEXT ·xgetbv(SB), NOSPLIT, $0-8 MOVL $0, CX XGETBV MOVL AX, eax+0(FP) diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go index 99c60fe..170d21d 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go +++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go @@ -23,9 +23,3 @@ func xgetbv() (eax, edx uint32) { gccgoXgetbv(&a, &d) return a, d } - -// gccgo doesn't build on Darwin, per: -// https://github.com/Homebrew/homebrew-core/blob/HEAD/Formula/gcc.rb#L76 -func darwinSupportsAVX512() bool { - return false -} diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go index 08f35ea..f1caf0f 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go @@ -110,7 +110,6 @@ func doinit() { ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM) ARM64.HasDIT = isSet(hwCap, hwcap_DIT) - // HWCAP2 feature bits ARM64.HasSVE2 = isSet(hwCap2, hwcap2_SVE2) ARM64.HasI8MM = isSet(hwCap2, hwcap2_I8MM) diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go index cd63e73..7d902b6 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x +//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x && !riscv64 package cpu diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go new file mode 100644 index 0000000..cb4a0c5 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go @@ -0,0 +1,137 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cpu + +import ( + "syscall" + "unsafe" +) + +// RISC-V extension discovery code for Linux. The approach here is to first try the riscv_hwprobe +// syscall falling back to HWCAP to check for the C extension if riscv_hwprobe is not available. +// +// A note on detection of the Vector extension using HWCAP. +// +// Support for the Vector extension version 1.0 was added to the Linux kernel in release 6.5. +// Support for the riscv_hwprobe syscall was added in 6.4. It follows that if the riscv_hwprobe +// syscall is not available then neither is the Vector extension (which needs kernel support). +// The riscv_hwprobe syscall should then be all we need to detect the Vector extension. +// However, some RISC-V board manufacturers ship boards with an older kernel on top of which +// they have back-ported various versions of the Vector extension patches but not the riscv_hwprobe +// patches. These kernels advertise support for the Vector extension using HWCAP. Falling +// back to HWCAP to detect the Vector extension, if riscv_hwprobe is not available, or simply not +// bothering with riscv_hwprobe at all and just using HWCAP may then seem like an attractive option. +// +// Unfortunately, simply checking the 'V' bit in AT_HWCAP will not work as this bit is used by +// RISC-V board and cloud instance providers to mean different things. The Lichee Pi 4A board +// and the Scaleway RV1 cloud instances use the 'V' bit to advertise their support for the unratified +// 0.7.1 version of the Vector Specification. The Banana Pi BPI-F3 and the CanMV-K230 board use +// it to advertise support for 1.0 of the Vector extension. Versions 0.7.1 and 1.0 of the Vector +// extension are binary incompatible. HWCAP can then not be used in isolation to populate the +// HasV field as this field indicates that the underlying CPU is compatible with RVV 1.0. +// +// There is a way at runtime to distinguish between versions 0.7.1 and 1.0 of the Vector +// specification by issuing a RVV 1.0 vsetvli instruction and checking the vill bit of the vtype +// register. This check would allow us to safely detect version 1.0 of the Vector extension +// with HWCAP, if riscv_hwprobe were not available. However, the check cannot +// be added until the assembler supports the Vector instructions. +// +// Note the riscv_hwprobe syscall does not suffer from these ambiguities by design as all of the +// extensions it advertises support for are explicitly versioned. It's also worth noting that +// the riscv_hwprobe syscall is the only way to detect multi-letter RISC-V extensions, e.g., Zba. +// These cannot be detected using HWCAP and so riscv_hwprobe must be used to detect the majority +// of RISC-V extensions. +// +// Please see https://docs.kernel.org/arch/riscv/hwprobe.html for more information. + +// golang.org/x/sys/cpu is not allowed to depend on golang.org/x/sys/unix so we must +// reproduce the constants, types and functions needed to make the riscv_hwprobe syscall +// here. + +const ( + // Copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. + riscv_HWPROBE_KEY_IMA_EXT_0 = 0x4 + riscv_HWPROBE_IMA_C = 0x2 + riscv_HWPROBE_IMA_V = 0x4 + riscv_HWPROBE_EXT_ZBA = 0x8 + riscv_HWPROBE_EXT_ZBB = 0x10 + riscv_HWPROBE_EXT_ZBS = 0x20 + riscv_HWPROBE_KEY_CPUPERF_0 = 0x5 + riscv_HWPROBE_MISALIGNED_FAST = 0x3 + riscv_HWPROBE_MISALIGNED_MASK = 0x7 +) + +const ( + // sys_RISCV_HWPROBE is copied from golang.org/x/sys/unix/zsysnum_linux_riscv64.go. + sys_RISCV_HWPROBE = 258 +) + +// riscvHWProbePairs is copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. +type riscvHWProbePairs struct { + key int64 + value uint64 +} + +const ( + // CPU features + hwcap_RISCV_ISA_C = 1 << ('C' - 'A') +) + +func doinit() { + // A slice of key/value pair structures is passed to the RISCVHWProbe syscall. The key + // field should be initialised with one of the key constants defined above, e.g., + // RISCV_HWPROBE_KEY_IMA_EXT_0. The syscall will set the value field to the appropriate value. + // If the kernel does not recognise a key it will set the key field to -1 and the value field to 0. + + pairs := []riscvHWProbePairs{ + {riscv_HWPROBE_KEY_IMA_EXT_0, 0}, + {riscv_HWPROBE_KEY_CPUPERF_0, 0}, + } + + // This call only indicates that extensions are supported if they are implemented on all cores. + if riscvHWProbe(pairs, 0) { + if pairs[0].key != -1 { + v := uint(pairs[0].value) + RISCV64.HasC = isSet(v, riscv_HWPROBE_IMA_C) + RISCV64.HasV = isSet(v, riscv_HWPROBE_IMA_V) + RISCV64.HasZba = isSet(v, riscv_HWPROBE_EXT_ZBA) + RISCV64.HasZbb = isSet(v, riscv_HWPROBE_EXT_ZBB) + RISCV64.HasZbs = isSet(v, riscv_HWPROBE_EXT_ZBS) + } + if pairs[1].key != -1 { + v := pairs[1].value & riscv_HWPROBE_MISALIGNED_MASK + RISCV64.HasFastMisaligned = v == riscv_HWPROBE_MISALIGNED_FAST + } + } + + // Let's double check with HWCAP if the C extension does not appear to be supported. + // This may happen if we're running on a kernel older than 6.4. + + if !RISCV64.HasC { + RISCV64.HasC = isSet(hwCap, hwcap_RISCV_ISA_C) + } +} + +func isSet(hwc uint, value uint) bool { + return hwc&value != 0 +} + +// riscvHWProbe is a simplified version of the generated wrapper function found in +// golang.org/x/sys/unix/zsyscall_linux_riscv64.go. We simplify it by removing the +// cpuCount and cpus parameters which we do not need. We always want to pass 0 for +// these parameters here so the kernel only reports the extensions that are present +// on all cores. +func riscvHWProbe(pairs []riscvHWProbePairs, flags uint) bool { + var _zero uintptr + var p0 unsafe.Pointer + if len(pairs) > 0 { + p0 = unsafe.Pointer(&pairs[0]) + } else { + p0 = unsafe.Pointer(&_zero) + } + + _, _, e1 := syscall.Syscall6(sys_RISCV_HWPROBE, uintptr(p0), uintptr(len(pairs)), uintptr(0), uintptr(0), uintptr(flags), 0) + return e1 == 0 +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_x86.go b/vendor/golang.org/x/sys/cpu/cpu_other_x86.go new file mode 100644 index 0000000..a0fd7e2 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_other_x86.go @@ -0,0 +1,11 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build 386 || amd64p32 || (amd64 && (!darwin || !gc)) + +package cpu + +func darwinSupportsAVX512() bool { + panic("only implemented for gc && amd64 && darwin") +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go index 7f0c79c..aca3199 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go @@ -8,4 +8,13 @@ package cpu const cacheLineSize = 64 -func initOptions() {} +func initOptions() { + options = []option{ + {Name: "fastmisaligned", Feature: &RISCV64.HasFastMisaligned}, + {Name: "c", Feature: &RISCV64.HasC}, + {Name: "v", Feature: &RISCV64.HasV}, + {Name: "zba", Feature: &RISCV64.HasZba}, + {Name: "zbb", Feature: &RISCV64.HasZbb}, + {Name: "zbs", Feature: &RISCV64.HasZbs}, + } +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.go b/vendor/golang.org/x/sys/cpu/cpu_x86.go index c29f5e4..600a680 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_x86.go +++ b/vendor/golang.org/x/sys/cpu/cpu_x86.go @@ -92,10 +92,8 @@ func archInit() { osSupportsAVX = isSet(1, eax) && isSet(2, eax) if runtime.GOOS == "darwin" { - // Darwin doesn't save/restore AVX-512 mask registers correctly across signal handlers. - // Since users can't rely on mask register contents, let's not advertise AVX-512 support. - // See issue 49233. - osSupportsAVX512 = false + // Darwin requires special AVX512 checks, see cpu_darwin_x86.go + osSupportsAVX512 = osSupportsAVX && darwinSupportsAVX512() } else { // Check if OPMASK and ZMM registers have OS support. osSupportsAVX512 = osSupportsAVX && isSet(5, eax) && isSet(6, eax) && isSet(7, eax) diff --git a/vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go b/vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go new file mode 100644 index 0000000..4d0888b --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go @@ -0,0 +1,98 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Minimal copy of x/sys/unix so the cpu package can make a +// system call on Darwin without depending on x/sys/unix. + +//go:build darwin && amd64 && gc + +package cpu + +import ( + "syscall" + "unsafe" +) + +type _C_int int32 + +// adapted from unix.Uname() at x/sys/unix/syscall_darwin.go L419 +func darwinOSRelease(release *[256]byte) error { + // from x/sys/unix/zerrors_openbsd_amd64.go + const ( + CTL_KERN = 0x1 + KERN_OSRELEASE = 0x2 + ) + + mib := []_C_int{CTL_KERN, KERN_OSRELEASE} + n := unsafe.Sizeof(*release) + + return sysctl(mib, &release[0], &n, nil, 0) +} + +type Errno = syscall.Errno + +var _zero uintptr // Single-word zero for use when we need a valid pointer to 0 bytes. + +// from x/sys/unix/zsyscall_darwin_amd64.go L791-807 +func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) error { + var _p0 unsafe.Pointer + if len(mib) > 0 { + _p0 = unsafe.Pointer(&mib[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + if _, _, err := syscall_syscall6( + libc_sysctl_trampoline_addr, + uintptr(_p0), + uintptr(len(mib)), + uintptr(unsafe.Pointer(old)), + uintptr(unsafe.Pointer(oldlen)), + uintptr(unsafe.Pointer(new)), + uintptr(newlen), + ); err != 0 { + return err + } + + return nil +} + +var libc_sysctl_trampoline_addr uintptr + +// adapted from internal/cpu/cpu_arm64_darwin.go +func darwinSysctlEnabled(name []byte) bool { + out := int32(0) + nout := unsafe.Sizeof(out) + if ret := sysctlbyname(&name[0], (*byte)(unsafe.Pointer(&out)), &nout, nil, 0); ret != nil { + return false + } + return out > 0 +} + +//go:cgo_import_dynamic libc_sysctl sysctl "/usr/lib/libSystem.B.dylib" + +var libc_sysctlbyname_trampoline_addr uintptr + +// adapted from runtime/sys_darwin.go in the pattern of sysctl() above, as defined in x/sys/unix +func sysctlbyname(name *byte, old *byte, oldlen *uintptr, new *byte, newlen uintptr) error { + if _, _, err := syscall_syscall6( + libc_sysctlbyname_trampoline_addr, + uintptr(unsafe.Pointer(name)), + uintptr(unsafe.Pointer(old)), + uintptr(unsafe.Pointer(oldlen)), + uintptr(unsafe.Pointer(new)), + uintptr(newlen), + 0, + ); err != 0 { + return err + } + + return nil +} + +//go:cgo_import_dynamic libc_sysctlbyname sysctlbyname "/usr/lib/libSystem.B.dylib" + +// Implemented in the runtime package (runtime/sys_darwin.go) +func syscall_syscall6(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err Errno) + +//go:linkname syscall_syscall6 syscall.syscall6 diff --git a/vendor/golang.org/x/sys/unix/README.md b/vendor/golang.org/x/sys/unix/README.md index 7d3c060..6e08a76 100644 --- a/vendor/golang.org/x/sys/unix/README.md +++ b/vendor/golang.org/x/sys/unix/README.md @@ -156,7 +156,7 @@ from the generated architecture-specific files listed below, and merge these into a common file for each OS. The merge is performed in the following steps: -1. Construct the set of common code that is idential in all architecture-specific files. +1. Construct the set of common code that is identical in all architecture-specific files. 2. Write this common code to the merged file. 3. Remove the common code from all architecture-specific files. diff --git a/vendor/golang.org/x/sys/unix/ioctl_linux.go b/vendor/golang.org/x/sys/unix/ioctl_linux.go index dbe680e..7ca4fa1 100644 --- a/vendor/golang.org/x/sys/unix/ioctl_linux.go +++ b/vendor/golang.org/x/sys/unix/ioctl_linux.go @@ -58,6 +58,102 @@ func IoctlGetEthtoolDrvinfo(fd int, ifname string) (*EthtoolDrvinfo, error) { return &value, err } +// IoctlGetEthtoolTsInfo fetches ethtool timestamping and PHC +// association for the network device specified by ifname. +func IoctlGetEthtoolTsInfo(fd int, ifname string) (*EthtoolTsInfo, error) { + ifr, err := NewIfreq(ifname) + if err != nil { + return nil, err + } + + value := EthtoolTsInfo{Cmd: ETHTOOL_GET_TS_INFO} + ifrd := ifr.withData(unsafe.Pointer(&value)) + + err = ioctlIfreqData(fd, SIOCETHTOOL, &ifrd) + return &value, err +} + +// IoctlGetHwTstamp retrieves the hardware timestamping configuration +// for the network device specified by ifname. +func IoctlGetHwTstamp(fd int, ifname string) (*HwTstampConfig, error) { + ifr, err := NewIfreq(ifname) + if err != nil { + return nil, err + } + + value := HwTstampConfig{} + ifrd := ifr.withData(unsafe.Pointer(&value)) + + err = ioctlIfreqData(fd, SIOCGHWTSTAMP, &ifrd) + return &value, err +} + +// IoctlSetHwTstamp updates the hardware timestamping configuration for +// the network device specified by ifname. +func IoctlSetHwTstamp(fd int, ifname string, cfg *HwTstampConfig) error { + ifr, err := NewIfreq(ifname) + if err != nil { + return err + } + ifrd := ifr.withData(unsafe.Pointer(cfg)) + return ioctlIfreqData(fd, SIOCSHWTSTAMP, &ifrd) +} + +// FdToClockID derives the clock ID from the file descriptor number +// - see clock_gettime(3), FD_TO_CLOCKID macros. The resulting ID is +// suitable for system calls like ClockGettime. +func FdToClockID(fd int) int32 { return int32((int(^fd) << 3) | 3) } + +// IoctlPtpClockGetcaps returns the description of a given PTP device. +func IoctlPtpClockGetcaps(fd int) (*PtpClockCaps, error) { + var value PtpClockCaps + err := ioctlPtr(fd, PTP_CLOCK_GETCAPS2, unsafe.Pointer(&value)) + return &value, err +} + +// IoctlPtpSysOffsetPrecise returns a description of the clock +// offset compared to the system clock. +func IoctlPtpSysOffsetPrecise(fd int) (*PtpSysOffsetPrecise, error) { + var value PtpSysOffsetPrecise + err := ioctlPtr(fd, PTP_SYS_OFFSET_PRECISE2, unsafe.Pointer(&value)) + return &value, err +} + +// IoctlPtpSysOffsetExtended returns an extended description of the +// clock offset compared to the system clock. The samples parameter +// specifies the desired number of measurements. +func IoctlPtpSysOffsetExtended(fd int, samples uint) (*PtpSysOffsetExtended, error) { + value := PtpSysOffsetExtended{Samples: uint32(samples)} + err := ioctlPtr(fd, PTP_SYS_OFFSET_EXTENDED2, unsafe.Pointer(&value)) + return &value, err +} + +// IoctlPtpPinGetfunc returns the configuration of the specified +// I/O pin on given PTP device. +func IoctlPtpPinGetfunc(fd int, index uint) (*PtpPinDesc, error) { + value := PtpPinDesc{Index: uint32(index)} + err := ioctlPtr(fd, PTP_PIN_GETFUNC2, unsafe.Pointer(&value)) + return &value, err +} + +// IoctlPtpPinSetfunc updates configuration of the specified PTP +// I/O pin. +func IoctlPtpPinSetfunc(fd int, pd *PtpPinDesc) error { + return ioctlPtr(fd, PTP_PIN_SETFUNC2, unsafe.Pointer(pd)) +} + +// IoctlPtpPeroutRequest configures the periodic output mode of the +// PTP I/O pins. +func IoctlPtpPeroutRequest(fd int, r *PtpPeroutRequest) error { + return ioctlPtr(fd, PTP_PEROUT_REQUEST2, unsafe.Pointer(r)) +} + +// IoctlPtpExttsRequest configures the external timestamping mode +// of the PTP I/O pins. +func IoctlPtpExttsRequest(fd int, r *PtpExttsRequest) error { + return ioctlPtr(fd, PTP_EXTTS_REQUEST2, unsafe.Pointer(r)) +} + // IoctlGetWatchdogInfo fetches information about a watchdog device from the // Linux watchdog API. For more information, see: // https://www.kernel.org/doc/html/latest/watchdog/watchdog-api.html. diff --git a/vendor/golang.org/x/sys/unix/mkerrors.sh b/vendor/golang.org/x/sys/unix/mkerrors.sh index d07dd09..6ab02b6 100644 --- a/vendor/golang.org/x/sys/unix/mkerrors.sh +++ b/vendor/golang.org/x/sys/unix/mkerrors.sh @@ -158,6 +158,16 @@ includes_Linux=' #endif #define _GNU_SOURCE +// See the description in unix/linux/types.go +#if defined(__ARM_EABI__) || \ + (defined(__mips__) && (_MIPS_SIM == _ABIO32)) || \ + (defined(__powerpc__) && (!defined(__powerpc64__))) +# ifdef _TIME_BITS +# undef _TIME_BITS +# endif +# define _TIME_BITS 32 +#endif + // is broken on powerpc64, as it fails to include definitions of // these structures. We just include them copied from . #if defined(__powerpc__) @@ -256,6 +266,7 @@ struct ltchars { #include #include #include +#include #include #include #include @@ -527,6 +538,7 @@ ccflags="$@" $2 ~ /^(AF|SOCK|SO|SOL|IPPROTO|IP|IPV6|TCP|MCAST|EVFILT|NOTE|SHUT|PROT|MAP|MREMAP|MFD|T?PACKET|MSG|SCM|MCL|DT|MADV|PR|LOCAL|TCPOPT|UDP)_/ || $2 ~ /^NFC_(GENL|PROTO|COMM|RF|SE|DIRECTION|LLCP|SOCKPROTO)_/ || $2 ~ /^NFC_.*_(MAX)?SIZE$/ || + $2 ~ /^PTP_/ || $2 ~ /^RAW_PAYLOAD_/ || $2 ~ /^[US]F_/ || $2 ~ /^TP_STATUS_/ || @@ -552,6 +564,7 @@ ccflags="$@" $2 !~ /^RTC_VL_(ACCURACY|BACKUP|DATA)/ && $2 ~ /^(NETLINK|NLM|NLMSG|NLA|IFA|IFAN|RT|RTC|RTCF|RTN|RTPROT|RTNH|ARPHRD|ETH_P|NETNSA)_/ || $2 ~ /^SOCK_|SK_DIAG_|SKNLGRP_$/ || + $2 ~ /^(CONNECT|SAE)_/ || $2 ~ /^FIORDCHK$/ || $2 ~ /^SIOC/ || $2 ~ /^TIOC/ || @@ -655,7 +668,7 @@ errors=$( signals=$( echo '#include ' | $CC -x c - -E -dM $ccflags | awk '$1=="#define" && $2 ~ /^SIG[A-Z0-9]+$/ { print $2 }' | - grep -v 'SIGSTKSIZE\|SIGSTKSZ\|SIGRT\|SIGMAX64' | + grep -E -v '(SIGSTKSIZE|SIGSTKSZ|SIGRT|SIGMAX64)' | sort ) @@ -665,7 +678,7 @@ echo '#include ' | $CC -x c - -E -dM $ccflags | sort >_error.grep echo '#include ' | $CC -x c - -E -dM $ccflags | awk '$1=="#define" && $2 ~ /^SIG[A-Z0-9]+$/ { print "^\t" $2 "[ \t]*=" }' | - grep -v 'SIGSTKSIZE\|SIGSTKSZ\|SIGRT\|SIGMAX64' | + grep -E -v '(SIGSTKSIZE|SIGSTKSZ|SIGRT|SIGMAX64)' | sort >_signal.grep echo '// mkerrors.sh' "$@" diff --git a/vendor/golang.org/x/sys/unix/syscall_aix.go b/vendor/golang.org/x/sys/unix/syscall_aix.go index 67ce6ce..6f15ba1 100644 --- a/vendor/golang.org/x/sys/unix/syscall_aix.go +++ b/vendor/golang.org/x/sys/unix/syscall_aix.go @@ -360,7 +360,7 @@ func Wait4(pid int, wstatus *WaitStatus, options int, rusage *Rusage) (wpid int, var status _C_int var r Pid_t err = ERESTART - // AIX wait4 may return with ERESTART errno, while the processus is still + // AIX wait4 may return with ERESTART errno, while the process is still // active. for err == ERESTART { r, err = wait4(Pid_t(pid), &status, options, rusage) diff --git a/vendor/golang.org/x/sys/unix/syscall_darwin.go b/vendor/golang.org/x/sys/unix/syscall_darwin.go index 2d15200..099867d 100644 --- a/vendor/golang.org/x/sys/unix/syscall_darwin.go +++ b/vendor/golang.org/x/sys/unix/syscall_darwin.go @@ -566,6 +566,43 @@ func PthreadFchdir(fd int) (err error) { return pthread_fchdir_np(fd) } +// Connectx calls connectx(2) to initiate a connection on a socket. +// +// srcIf, srcAddr, and dstAddr are filled into a [SaEndpoints] struct and passed as the endpoints argument. +// +// - srcIf is the optional source interface index. 0 means unspecified. +// - srcAddr is the optional source address. nil means unspecified. +// - dstAddr is the destination address. +// +// On success, Connectx returns the number of bytes enqueued for transmission. +func Connectx(fd int, srcIf uint32, srcAddr, dstAddr Sockaddr, associd SaeAssocID, flags uint32, iov []Iovec, connid *SaeConnID) (n uintptr, err error) { + endpoints := SaEndpoints{ + Srcif: srcIf, + } + + if srcAddr != nil { + addrp, addrlen, err := srcAddr.sockaddr() + if err != nil { + return 0, err + } + endpoints.Srcaddr = (*RawSockaddr)(addrp) + endpoints.Srcaddrlen = uint32(addrlen) + } + + if dstAddr != nil { + addrp, addrlen, err := dstAddr.sockaddr() + if err != nil { + return 0, err + } + endpoints.Dstaddr = (*RawSockaddr)(addrp) + endpoints.Dstaddrlen = uint32(addrlen) + } + + err = connectx(fd, &endpoints, associd, flags, iov, &n, connid) + return +} + +//sys connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) //sys sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) //sys shmat(id int, addr uintptr, flag int) (ret uintptr, err error) diff --git a/vendor/golang.org/x/sys/unix/syscall_hurd.go b/vendor/golang.org/x/sys/unix/syscall_hurd.go index ba46651..a6a2d2f 100644 --- a/vendor/golang.org/x/sys/unix/syscall_hurd.go +++ b/vendor/golang.org/x/sys/unix/syscall_hurd.go @@ -11,6 +11,7 @@ package unix int ioctl(int, unsigned long int, uintptr_t); */ import "C" +import "unsafe" func ioctl(fd int, req uint, arg uintptr) (err error) { r0, er := C.ioctl(C.int(fd), C.ulong(req), C.uintptr_t(arg)) diff --git a/vendor/golang.org/x/sys/unix/syscall_linux.go b/vendor/golang.org/x/sys/unix/syscall_linux.go index 3f1d3d4..230a945 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux.go @@ -1295,6 +1295,48 @@ func GetsockoptTCPInfo(fd, level, opt int) (*TCPInfo, error) { return &value, err } +// GetsockoptTCPCCVegasInfo returns algorithm specific congestion control information for a socket using the "vegas" +// algorithm. +// +// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option: +// +// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION) +func GetsockoptTCPCCVegasInfo(fd, level, opt int) (*TCPVegasInfo, error) { + var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment + vallen := _Socklen(SizeofTCPCCInfo) + err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen) + out := (*TCPVegasInfo)(unsafe.Pointer(&value[0])) + return out, err +} + +// GetsockoptTCPCCDCTCPInfo returns algorithm specific congestion control information for a socket using the "dctp" +// algorithm. +// +// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option: +// +// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION) +func GetsockoptTCPCCDCTCPInfo(fd, level, opt int) (*TCPDCTCPInfo, error) { + var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment + vallen := _Socklen(SizeofTCPCCInfo) + err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen) + out := (*TCPDCTCPInfo)(unsafe.Pointer(&value[0])) + return out, err +} + +// GetsockoptTCPCCBBRInfo returns algorithm specific congestion control information for a socket using the "bbr" +// algorithm. +// +// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option: +// +// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION) +func GetsockoptTCPCCBBRInfo(fd, level, opt int) (*TCPBBRInfo, error) { + var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment + vallen := _Socklen(SizeofTCPCCInfo) + err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen) + out := (*TCPBBRInfo)(unsafe.Pointer(&value[0])) + return out, err +} + // GetsockoptString returns the string value of the socket option opt for the // socket associated with fd at the given socket level. func GetsockoptString(fd, level, opt int) (string, error) { @@ -1818,6 +1860,7 @@ func Sendfile(outfd int, infd int, offset *int64, count int) (written int, err e //sys ClockAdjtime(clockid int32, buf *Timex) (state int, err error) //sys ClockGetres(clockid int32, res *Timespec) (err error) //sys ClockGettime(clockid int32, time *Timespec) (err error) +//sys ClockSettime(clockid int32, time *Timespec) (err error) //sys ClockNanosleep(clockid int32, flags int, request *Timespec, remain *Timespec) (err error) //sys Close(fd int) (err error) //sys CloseRange(first uint, last uint, flags uint) (err error) @@ -1959,7 +2002,26 @@ func Getpgrp() (pid int) { //sysnb Getpid() (pid int) //sysnb Getppid() (ppid int) //sys Getpriority(which int, who int) (prio int, err error) -//sys Getrandom(buf []byte, flags int) (n int, err error) + +func Getrandom(buf []byte, flags int) (n int, err error) { + vdsoRet, supported := vgetrandom(buf, uint32(flags)) + if supported { + if vdsoRet < 0 { + return 0, errnoErr(syscall.Errno(-vdsoRet)) + } + return vdsoRet, nil + } + var p *byte + if len(buf) > 0 { + p = &buf[0] + } + r, _, e := Syscall(SYS_GETRANDOM, uintptr(unsafe.Pointer(p)), uintptr(len(buf)), uintptr(flags)) + if e != 0 { + return 0, errnoErr(e) + } + return int(r), nil +} + //sysnb Getrusage(who int, rusage *Rusage) (err error) //sysnb Getsid(pid int) (sid int, err error) //sysnb Gettid() (tid int) diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go b/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go index cf2ee6c..745e5c7 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go @@ -182,3 +182,5 @@ func KexecFileLoad(kernelFd int, initrdFd int, cmdline string, flags int) error } return kexecFileLoad(kernelFd, initrdFd, cmdlineLen, cmdline, flags) } + +const SYS_FSTATAT = SYS_NEWFSTATAT diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go b/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go index 3d0e984..dd2262a 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go @@ -214,3 +214,5 @@ func KexecFileLoad(kernelFd int, initrdFd int, cmdline string, flags int) error } return kexecFileLoad(kernelFd, initrdFd, cmdlineLen, cmdline, flags) } + +const SYS_FSTATAT = SYS_NEWFSTATAT diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go b/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go index 6f5a288..8cf3670 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go @@ -187,3 +187,5 @@ func RISCVHWProbe(pairs []RISCVHWProbePairs, set *CPUSet, flags uint) (err error } return riscvHWProbe(pairs, setSize, set, flags) } + +const SYS_FSTATAT = SYS_NEWFSTATAT diff --git a/vendor/golang.org/x/sys/unix/syscall_zos_s390x.go b/vendor/golang.org/x/sys/unix/syscall_zos_s390x.go index 312ae6a..7bf5c04 100644 --- a/vendor/golang.org/x/sys/unix/syscall_zos_s390x.go +++ b/vendor/golang.org/x/sys/unix/syscall_zos_s390x.go @@ -768,6 +768,15 @@ func Munmap(b []byte) (err error) { return mapper.Munmap(b) } +func MmapPtr(fd int, offset int64, addr unsafe.Pointer, length uintptr, prot int, flags int) (ret unsafe.Pointer, err error) { + xaddr, err := mapper.mmap(uintptr(addr), length, prot, flags, fd, offset) + return unsafe.Pointer(xaddr), err +} + +func MunmapPtr(addr unsafe.Pointer, length uintptr) (err error) { + return mapper.munmap(uintptr(addr), length) +} + //sys Gethostname(buf []byte) (err error) = SYS___GETHOSTNAME_A //sysnb Getgid() (gid int) //sysnb Getpid() (pid int) @@ -816,10 +825,10 @@ func Lstat(path string, stat *Stat_t) (err error) { // for checking symlinks begins with $VERSION/ $SYSNAME/ $SYSSYMR/ $SYSSYMA/ func isSpecialPath(path []byte) (v bool) { var special = [4][8]byte{ - [8]byte{'V', 'E', 'R', 'S', 'I', 'O', 'N', '/'}, - [8]byte{'S', 'Y', 'S', 'N', 'A', 'M', 'E', '/'}, - [8]byte{'S', 'Y', 'S', 'S', 'Y', 'M', 'R', '/'}, - [8]byte{'S', 'Y', 'S', 'S', 'Y', 'M', 'A', '/'}} + {'V', 'E', 'R', 'S', 'I', 'O', 'N', '/'}, + {'S', 'Y', 'S', 'N', 'A', 'M', 'E', '/'}, + {'S', 'Y', 'S', 'S', 'Y', 'M', 'R', '/'}, + {'S', 'Y', 'S', 'S', 'Y', 'M', 'A', '/'}} var i, j int for i = 0; i < len(special); i++ { @@ -3115,3 +3124,90 @@ func legacy_Mkfifoat(dirfd int, path string, mode uint32) (err error) { //sys Posix_openpt(oflag int) (fd int, err error) = SYS_POSIX_OPENPT //sys Grantpt(fildes int) (rc int, err error) = SYS_GRANTPT //sys Unlockpt(fildes int) (rc int, err error) = SYS_UNLOCKPT + +func fcntlAsIs(fd uintptr, cmd int, arg uintptr) (val int, err error) { + runtime.EnterSyscall() + r0, e2, e1 := CallLeFuncWithErr(GetZosLibVec()+SYS_FCNTL<<4, uintptr(fd), uintptr(cmd), arg) + runtime.ExitSyscall() + val = int(r0) + if int64(r0) == -1 { + err = errnoErr2(e1, e2) + } + return +} + +func Fcntl(fd uintptr, cmd int, op interface{}) (ret int, err error) { + switch op.(type) { + case *Flock_t: + err = FcntlFlock(fd, cmd, op.(*Flock_t)) + if err != nil { + ret = -1 + } + return + case int: + return FcntlInt(fd, cmd, op.(int)) + case *F_cnvrt: + return fcntlAsIs(fd, cmd, uintptr(unsafe.Pointer(op.(*F_cnvrt)))) + case unsafe.Pointer: + return fcntlAsIs(fd, cmd, uintptr(op.(unsafe.Pointer))) + default: + return -1, EINVAL + } + return +} + +func Sendfile(outfd int, infd int, offset *int64, count int) (written int, err error) { + if raceenabled { + raceReleaseMerge(unsafe.Pointer(&ioSync)) + } + return sendfile(outfd, infd, offset, count) +} + +func sendfile(outfd int, infd int, offset *int64, count int) (written int, err error) { + // TODO: use LE call instead if the call is implemented + originalOffset, err := Seek(infd, 0, SEEK_CUR) + if err != nil { + return -1, err + } + //start reading data from in_fd + if offset != nil { + _, err := Seek(infd, *offset, SEEK_SET) + if err != nil { + return -1, err + } + } + + buf := make([]byte, count) + readBuf := make([]byte, 0) + var n int = 0 + for i := 0; i < count; i += n { + n, err := Read(infd, buf) + if n == 0 { + if err != nil { + return -1, err + } else { // EOF + break + } + } + readBuf = append(readBuf, buf...) + buf = buf[0:0] + } + + n2, err := Write(outfd, readBuf) + if err != nil { + return -1, err + } + + //When sendfile() returns, this variable will be set to the + // offset of the byte following the last byte that was read. + if offset != nil { + *offset = *offset + int64(n) + // If offset is not NULL, then sendfile() does not modify the file + // offset of in_fd + _, err := Seek(infd, originalOffset, SEEK_SET) + if err != nil { + return -1, err + } + } + return n2, nil +} diff --git a/vendor/golang.org/x/sys/unix/vgetrandom_linux.go b/vendor/golang.org/x/sys/unix/vgetrandom_linux.go new file mode 100644 index 0000000..07ac8e0 --- /dev/null +++ b/vendor/golang.org/x/sys/unix/vgetrandom_linux.go @@ -0,0 +1,13 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build linux && go1.24 + +package unix + +import _ "unsafe" + +//go:linkname vgetrandom runtime.vgetrandom +//go:noescape +func vgetrandom(p []byte, flags uint32) (ret int, supported bool) diff --git a/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go b/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go new file mode 100644 index 0000000..297e97b --- /dev/null +++ b/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go @@ -0,0 +1,11 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !linux || !go1.24 + +package unix + +func vgetrandom(p []byte, flags uint32) (ret int, supported bool) { + return -1, false +} diff --git a/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go index 4308ac1..d73c465 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go @@ -237,6 +237,9 @@ const ( CLOCK_UPTIME_RAW_APPROX = 0x9 CLONE_NOFOLLOW = 0x1 CLONE_NOOWNERCOPY = 0x2 + CONNECT_DATA_AUTHENTICATED = 0x4 + CONNECT_DATA_IDEMPOTENT = 0x2 + CONNECT_RESUME_ON_READ_WRITE = 0x1 CR0 = 0x0 CR1 = 0x1000 CR2 = 0x2000 @@ -1265,6 +1268,10 @@ const ( RTV_SSTHRESH = 0x20 RUSAGE_CHILDREN = -0x1 RUSAGE_SELF = 0x0 + SAE_ASSOCID_ALL = 0xffffffff + SAE_ASSOCID_ANY = 0x0 + SAE_CONNID_ALL = 0xffffffff + SAE_CONNID_ANY = 0x0 SCM_CREDS = 0x3 SCM_RIGHTS = 0x1 SCM_TIMESTAMP = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go index c8068a7..4a55a40 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go @@ -237,6 +237,9 @@ const ( CLOCK_UPTIME_RAW_APPROX = 0x9 CLONE_NOFOLLOW = 0x1 CLONE_NOOWNERCOPY = 0x2 + CONNECT_DATA_AUTHENTICATED = 0x4 + CONNECT_DATA_IDEMPOTENT = 0x2 + CONNECT_RESUME_ON_READ_WRITE = 0x1 CR0 = 0x0 CR1 = 0x1000 CR2 = 0x2000 @@ -1265,6 +1268,10 @@ const ( RTV_SSTHRESH = 0x20 RUSAGE_CHILDREN = -0x1 RUSAGE_SELF = 0x0 + SAE_ASSOCID_ALL = 0xffffffff + SAE_ASSOCID_ANY = 0x0 + SAE_CONNID_ALL = 0xffffffff + SAE_CONNID_ANY = 0x0 SCM_CREDS = 0x3 SCM_RIGHTS = 0x1 SCM_TIMESTAMP = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux.go b/vendor/golang.org/x/sys/unix/zerrors_linux.go index 01a70b2..6ebc48b 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux.go @@ -321,6 +321,9 @@ const ( AUDIT_INTEGRITY_STATUS = 0x70a AUDIT_IPC = 0x517 AUDIT_IPC_SET_PERM = 0x51f + AUDIT_IPE_ACCESS = 0x58c + AUDIT_IPE_CONFIG_CHANGE = 0x58d + AUDIT_IPE_POLICY_LOAD = 0x58e AUDIT_KERNEL = 0x7d0 AUDIT_KERNEL_OTHER = 0x524 AUDIT_KERN_MODULE = 0x532 @@ -489,12 +492,14 @@ const ( BPF_F_ID = 0x20 BPF_F_NETFILTER_IP_DEFRAG = 0x1 BPF_F_QUERY_EFFECTIVE = 0x1 + BPF_F_REDIRECT_FLAGS = 0x19 BPF_F_REPLACE = 0x4 BPF_F_SLEEPABLE = 0x10 BPF_F_STRICT_ALIGNMENT = 0x1 BPF_F_TEST_REG_INVARIANTS = 0x80 BPF_F_TEST_RND_HI32 = 0x4 BPF_F_TEST_RUN_ON_CPU = 0x1 + BPF_F_TEST_SKB_CHECKSUM_COMPLETE = 0x4 BPF_F_TEST_STATE_FREQ = 0x8 BPF_F_TEST_XDP_LIVE_FRAMES = 0x2 BPF_F_XDP_DEV_BOUND_ONLY = 0x40 @@ -1165,6 +1170,7 @@ const ( EXTA = 0xe EXTB = 0xf F2FS_SUPER_MAGIC = 0xf2f52010 + FALLOC_FL_ALLOCATE_RANGE = 0x0 FALLOC_FL_COLLAPSE_RANGE = 0x8 FALLOC_FL_INSERT_RANGE = 0x20 FALLOC_FL_KEEP_SIZE = 0x1 @@ -1798,6 +1804,8 @@ const ( LANDLOCK_ACCESS_NET_BIND_TCP = 0x1 LANDLOCK_ACCESS_NET_CONNECT_TCP = 0x2 LANDLOCK_CREATE_RULESET_VERSION = 0x1 + LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET = 0x1 + LANDLOCK_SCOPE_SIGNAL = 0x2 LINUX_REBOOT_CMD_CAD_OFF = 0x0 LINUX_REBOOT_CMD_CAD_ON = 0x89abcdef LINUX_REBOOT_CMD_HALT = 0xcdef0123 @@ -1922,6 +1930,8 @@ const ( MNT_EXPIRE = 0x4 MNT_FORCE = 0x1 MNT_ID_REQ_SIZE_VER0 = 0x18 + MNT_ID_REQ_SIZE_VER1 = 0x20 + MNT_NS_INFO_SIZE_VER0 = 0x10 MODULE_INIT_COMPRESSED_FILE = 0x4 MODULE_INIT_IGNORE_MODVERSIONS = 0x1 MODULE_INIT_IGNORE_VERMAGIC = 0x2 @@ -2187,7 +2197,7 @@ const ( NFT_REG_SIZE = 0x10 NFT_REJECT_ICMPX_MAX = 0x3 NFT_RT_MAX = 0x4 - NFT_SECMARK_CTX_MAXLEN = 0x100 + NFT_SECMARK_CTX_MAXLEN = 0x1000 NFT_SET_MAXNAMELEN = 0x100 NFT_SOCKET_MAX = 0x3 NFT_TABLE_F_MASK = 0x7 @@ -2356,9 +2366,11 @@ const ( PERF_MEM_LVLNUM_IO = 0xa PERF_MEM_LVLNUM_L1 = 0x1 PERF_MEM_LVLNUM_L2 = 0x2 + PERF_MEM_LVLNUM_L2_MHB = 0x5 PERF_MEM_LVLNUM_L3 = 0x3 PERF_MEM_LVLNUM_L4 = 0x4 PERF_MEM_LVLNUM_LFB = 0xc + PERF_MEM_LVLNUM_MSC = 0x6 PERF_MEM_LVLNUM_NA = 0xf PERF_MEM_LVLNUM_PMEM = 0xe PERF_MEM_LVLNUM_RAM = 0xd @@ -2431,6 +2443,7 @@ const ( PRIO_PGRP = 0x1 PRIO_PROCESS = 0x0 PRIO_USER = 0x2 + PROCFS_IOCTL_MAGIC = 'f' PROC_SUPER_MAGIC = 0x9fa0 PROT_EXEC = 0x4 PROT_GROWSDOWN = 0x1000000 @@ -2620,6 +2633,28 @@ const ( PR_UNALIGN_NOPRINT = 0x1 PR_UNALIGN_SIGBUS = 0x2 PSTOREFS_MAGIC = 0x6165676c + PTP_CLK_MAGIC = '=' + PTP_ENABLE_FEATURE = 0x1 + PTP_EXTTS_EDGES = 0x6 + PTP_EXTTS_EVENT_VALID = 0x1 + PTP_EXTTS_V1_VALID_FLAGS = 0x7 + PTP_EXTTS_VALID_FLAGS = 0x1f + PTP_EXT_OFFSET = 0x10 + PTP_FALLING_EDGE = 0x4 + PTP_MAX_SAMPLES = 0x19 + PTP_PEROUT_DUTY_CYCLE = 0x2 + PTP_PEROUT_ONE_SHOT = 0x1 + PTP_PEROUT_PHASE = 0x4 + PTP_PEROUT_V1_VALID_FLAGS = 0x0 + PTP_PEROUT_VALID_FLAGS = 0x7 + PTP_PIN_GETFUNC = 0xc0603d06 + PTP_PIN_GETFUNC2 = 0xc0603d0f + PTP_RISING_EDGE = 0x2 + PTP_STRICT_FLAGS = 0x8 + PTP_SYS_OFFSET_EXTENDED = 0xc4c03d09 + PTP_SYS_OFFSET_EXTENDED2 = 0xc4c03d12 + PTP_SYS_OFFSET_PRECISE = 0xc0403d08 + PTP_SYS_OFFSET_PRECISE2 = 0xc0403d11 PTRACE_ATTACH = 0x10 PTRACE_CONT = 0x7 PTRACE_DETACH = 0x11 @@ -2933,15 +2968,17 @@ const ( RUSAGE_SELF = 0x0 RUSAGE_THREAD = 0x1 RWF_APPEND = 0x10 + RWF_ATOMIC = 0x40 RWF_DSYNC = 0x2 RWF_HIPRI = 0x1 RWF_NOAPPEND = 0x20 RWF_NOWAIT = 0x8 - RWF_SUPPORTED = 0x3f + RWF_SUPPORTED = 0x7f RWF_SYNC = 0x4 RWF_WRITE_LIFE_NOT_SET = 0x0 SCHED_BATCH = 0x3 SCHED_DEADLINE = 0x6 + SCHED_EXT = 0x7 SCHED_FIFO = 0x1 SCHED_FLAG_ALL = 0x7f SCHED_FLAG_DL_OVERRUN = 0x4 @@ -3210,6 +3247,7 @@ const ( STATX_ATTR_MOUNT_ROOT = 0x2000 STATX_ATTR_NODUMP = 0x40 STATX_ATTR_VERITY = 0x100000 + STATX_ATTR_WRITE_ATOMIC = 0x400000 STATX_BASIC_STATS = 0x7ff STATX_BLOCKS = 0x400 STATX_BTIME = 0x800 @@ -3226,6 +3264,7 @@ const ( STATX_SUBVOL = 0x8000 STATX_TYPE = 0x1 STATX_UID = 0x8 + STATX_WRITE_ATOMIC = 0x10000 STATX__RESERVED = 0x80000000 SYNC_FILE_RANGE_WAIT_AFTER = 0x4 SYNC_FILE_RANGE_WAIT_BEFORE = 0x1 @@ -3624,6 +3663,7 @@ const ( XDP_UMEM_PGOFF_COMPLETION_RING = 0x180000000 XDP_UMEM_PGOFF_FILL_RING = 0x100000000 XDP_UMEM_REG = 0x4 + XDP_UMEM_TX_METADATA_LEN = 0x4 XDP_UMEM_TX_SW_CSUM = 0x2 XDP_UMEM_UNALIGNED_CHUNK_FLAG = 0x1 XDP_USE_NEED_WAKEUP = 0x8 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go index 684a516..c0d45e3 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go @@ -109,6 +109,7 @@ const ( HIDIOCGRAWINFO = 0x80084803 HIDIOCGRDESC = 0x90044802 HIDIOCGRDESCSIZE = 0x80044801 + HIDIOCREVOKE = 0x4004480d HUPCL = 0x400 ICANON = 0x2 IEXTEN = 0x8000 @@ -153,9 +154,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 @@ -232,6 +238,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x7434 PPPIOCXFERUNIT = 0x744e PR_SET_PTRACER_ANY = 0xffffffff + PTP_CLOCK_GETCAPS = 0x80503d01 + PTP_CLOCK_GETCAPS2 = 0x80503d0a + PTP_ENABLE_PPS = 0x40043d04 + PTP_ENABLE_PPS2 = 0x40043d0d + PTP_EXTTS_REQUEST = 0x40103d02 + PTP_EXTTS_REQUEST2 = 0x40103d0b + PTP_MASK_CLEAR_ALL = 0x3d13 + PTP_MASK_EN_SINGLE = 0x40043d14 + PTP_PEROUT_REQUEST = 0x40383d03 + PTP_PEROUT_REQUEST2 = 0x40383d0c + PTP_PIN_SETFUNC = 0x40603d07 + PTP_PIN_SETFUNC2 = 0x40603d10 + PTP_SYS_OFFSET = 0x43403d05 + PTP_SYS_OFFSET2 = 0x43403d0e PTRACE_GETFPREGS = 0xe PTRACE_GETFPXREGS = 0x12 PTRACE_GET_THREAD_AREA = 0x19 @@ -278,6 +298,8 @@ const ( RTC_WIE_ON = 0x700f RTC_WKALM_RD = 0x80287010 RTC_WKALM_SET = 0x4028700f + SCM_DEVMEM_DMABUF = 0x4f + SCM_DEVMEM_LINEAR = 0x4e SCM_TIMESTAMPING = 0x25 SCM_TIMESTAMPING_OPT_STATS = 0x36 SCM_TIMESTAMPING_PKTINFO = 0x3a @@ -316,6 +338,9 @@ const ( SO_CNX_ADVICE = 0x35 SO_COOKIE = 0x39 SO_DETACH_REUSEPORT_BPF = 0x44 + SO_DEVMEM_DMABUF = 0x4f + SO_DEVMEM_DONTNEED = 0x50 + SO_DEVMEM_LINEAR = 0x4e SO_DOMAIN = 0x27 SO_DONTROUTE = 0x5 SO_ERROR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go index 61d74b5..c731d24 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go @@ -109,6 +109,7 @@ const ( HIDIOCGRAWINFO = 0x80084803 HIDIOCGRDESC = 0x90044802 HIDIOCGRDESCSIZE = 0x80044801 + HIDIOCREVOKE = 0x4004480d HUPCL = 0x400 ICANON = 0x2 IEXTEN = 0x8000 @@ -153,9 +154,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 @@ -232,6 +238,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x7434 PPPIOCXFERUNIT = 0x744e PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x80503d01 + PTP_CLOCK_GETCAPS2 = 0x80503d0a + PTP_ENABLE_PPS = 0x40043d04 + PTP_ENABLE_PPS2 = 0x40043d0d + PTP_EXTTS_REQUEST = 0x40103d02 + PTP_EXTTS_REQUEST2 = 0x40103d0b + PTP_MASK_CLEAR_ALL = 0x3d13 + PTP_MASK_EN_SINGLE = 0x40043d14 + PTP_PEROUT_REQUEST = 0x40383d03 + PTP_PEROUT_REQUEST2 = 0x40383d0c + PTP_PIN_SETFUNC = 0x40603d07 + PTP_PIN_SETFUNC2 = 0x40603d10 + PTP_SYS_OFFSET = 0x43403d05 + PTP_SYS_OFFSET2 = 0x43403d0e PTRACE_ARCH_PRCTL = 0x1e PTRACE_GETFPREGS = 0xe PTRACE_GETFPXREGS = 0x12 @@ -279,6 +299,8 @@ const ( RTC_WIE_ON = 0x700f RTC_WKALM_RD = 0x80287010 RTC_WKALM_SET = 0x4028700f + SCM_DEVMEM_DMABUF = 0x4f + SCM_DEVMEM_LINEAR = 0x4e SCM_TIMESTAMPING = 0x25 SCM_TIMESTAMPING_OPT_STATS = 0x36 SCM_TIMESTAMPING_PKTINFO = 0x3a @@ -317,6 +339,9 @@ const ( SO_CNX_ADVICE = 0x35 SO_COOKIE = 0x39 SO_DETACH_REUSEPORT_BPF = 0x44 + SO_DEVMEM_DMABUF = 0x4f + SO_DEVMEM_DONTNEED = 0x50 + SO_DEVMEM_LINEAR = 0x4e SO_DOMAIN = 0x27 SO_DONTROUTE = 0x5 SO_ERROR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go index a28c9e3..680018a 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go @@ -108,6 +108,7 @@ const ( HIDIOCGRAWINFO = 0x80084803 HIDIOCGRDESC = 0x90044802 HIDIOCGRDESCSIZE = 0x80044801 + HIDIOCREVOKE = 0x4004480d HUPCL = 0x400 ICANON = 0x2 IEXTEN = 0x8000 @@ -150,9 +151,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 @@ -229,6 +235,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x7434 PPPIOCXFERUNIT = 0x744e PR_SET_PTRACER_ANY = 0xffffffff + PTP_CLOCK_GETCAPS = 0x80503d01 + PTP_CLOCK_GETCAPS2 = 0x80503d0a + PTP_ENABLE_PPS = 0x40043d04 + PTP_ENABLE_PPS2 = 0x40043d0d + PTP_EXTTS_REQUEST = 0x40103d02 + PTP_EXTTS_REQUEST2 = 0x40103d0b + PTP_MASK_CLEAR_ALL = 0x3d13 + PTP_MASK_EN_SINGLE = 0x40043d14 + PTP_PEROUT_REQUEST = 0x40383d03 + PTP_PEROUT_REQUEST2 = 0x40383d0c + PTP_PIN_SETFUNC = 0x40603d07 + PTP_PIN_SETFUNC2 = 0x40603d10 + PTP_SYS_OFFSET = 0x43403d05 + PTP_SYS_OFFSET2 = 0x43403d0e PTRACE_GETCRUNCHREGS = 0x19 PTRACE_GETFDPIC = 0x1f PTRACE_GETFDPIC_EXEC = 0x0 @@ -284,6 +304,8 @@ const ( RTC_WIE_ON = 0x700f RTC_WKALM_RD = 0x80287010 RTC_WKALM_SET = 0x4028700f + SCM_DEVMEM_DMABUF = 0x4f + SCM_DEVMEM_LINEAR = 0x4e SCM_TIMESTAMPING = 0x25 SCM_TIMESTAMPING_OPT_STATS = 0x36 SCM_TIMESTAMPING_PKTINFO = 0x3a @@ -322,6 +344,9 @@ const ( SO_CNX_ADVICE = 0x35 SO_COOKIE = 0x39 SO_DETACH_REUSEPORT_BPF = 0x44 + SO_DEVMEM_DMABUF = 0x4f + SO_DEVMEM_DONTNEED = 0x50 + SO_DEVMEM_LINEAR = 0x4e SO_DOMAIN = 0x27 SO_DONTROUTE = 0x5 SO_ERROR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go index ab5d1fe..a63909f 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go @@ -112,6 +112,7 @@ const ( HIDIOCGRAWINFO = 0x80084803 HIDIOCGRDESC = 0x90044802 HIDIOCGRDESCSIZE = 0x80044801 + HIDIOCREVOKE = 0x4004480d HUPCL = 0x400 ICANON = 0x2 IEXTEN = 0x8000 @@ -154,9 +155,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 @@ -200,6 +206,7 @@ const ( PERF_EVENT_IOC_SET_BPF = 0x40042408 PERF_EVENT_IOC_SET_FILTER = 0x40082406 PERF_EVENT_IOC_SET_OUTPUT = 0x2405 + POE_MAGIC = 0x504f4530 PPPIOCATTACH = 0x4004743d PPPIOCATTCHAN = 0x40047438 PPPIOCBRIDGECHAN = 0x40047435 @@ -235,6 +242,20 @@ const ( PROT_BTI = 0x10 PROT_MTE = 0x20 PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x80503d01 + PTP_CLOCK_GETCAPS2 = 0x80503d0a + PTP_ENABLE_PPS = 0x40043d04 + PTP_ENABLE_PPS2 = 0x40043d0d + PTP_EXTTS_REQUEST = 0x40103d02 + PTP_EXTTS_REQUEST2 = 0x40103d0b + PTP_MASK_CLEAR_ALL = 0x3d13 + PTP_MASK_EN_SINGLE = 0x40043d14 + PTP_PEROUT_REQUEST = 0x40383d03 + PTP_PEROUT_REQUEST2 = 0x40383d0c + PTP_PIN_SETFUNC = 0x40603d07 + PTP_PIN_SETFUNC2 = 0x40603d10 + PTP_SYS_OFFSET = 0x43403d05 + PTP_SYS_OFFSET2 = 0x43403d0e PTRACE_PEEKMTETAGS = 0x21 PTRACE_POKEMTETAGS = 0x22 PTRACE_SYSEMU = 0x1f @@ -275,6 +296,8 @@ const ( RTC_WIE_ON = 0x700f RTC_WKALM_RD = 0x80287010 RTC_WKALM_SET = 0x4028700f + SCM_DEVMEM_DMABUF = 0x4f + SCM_DEVMEM_LINEAR = 0x4e SCM_TIMESTAMPING = 0x25 SCM_TIMESTAMPING_OPT_STATS = 0x36 SCM_TIMESTAMPING_PKTINFO = 0x3a @@ -313,6 +336,9 @@ const ( SO_CNX_ADVICE = 0x35 SO_COOKIE = 0x39 SO_DETACH_REUSEPORT_BPF = 0x44 + SO_DEVMEM_DMABUF = 0x4f + SO_DEVMEM_DONTNEED = 0x50 + SO_DEVMEM_LINEAR = 0x4e SO_DOMAIN = 0x27 SO_DONTROUTE = 0x5 SO_ERROR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go index c523090..9b0a257 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go @@ -109,6 +109,7 @@ const ( HIDIOCGRAWINFO = 0x80084803 HIDIOCGRDESC = 0x90044802 HIDIOCGRDESCSIZE = 0x80044801 + HIDIOCREVOKE = 0x4004480d HUPCL = 0x400 ICANON = 0x2 IEXTEN = 0x8000 @@ -154,9 +155,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 @@ -233,6 +239,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x7434 PPPIOCXFERUNIT = 0x744e PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x80503d01 + PTP_CLOCK_GETCAPS2 = 0x80503d0a + PTP_ENABLE_PPS = 0x40043d04 + PTP_ENABLE_PPS2 = 0x40043d0d + PTP_EXTTS_REQUEST = 0x40103d02 + PTP_EXTTS_REQUEST2 = 0x40103d0b + PTP_MASK_CLEAR_ALL = 0x3d13 + PTP_MASK_EN_SINGLE = 0x40043d14 + PTP_PEROUT_REQUEST = 0x40383d03 + PTP_PEROUT_REQUEST2 = 0x40383d0c + PTP_PIN_SETFUNC = 0x40603d07 + PTP_PIN_SETFUNC2 = 0x40603d10 + PTP_SYS_OFFSET = 0x43403d05 + PTP_SYS_OFFSET2 = 0x43403d0e PTRACE_SYSEMU = 0x1f PTRACE_SYSEMU_SINGLESTEP = 0x20 RLIMIT_AS = 0x9 @@ -271,6 +291,8 @@ const ( RTC_WIE_ON = 0x700f RTC_WKALM_RD = 0x80287010 RTC_WKALM_SET = 0x4028700f + SCM_DEVMEM_DMABUF = 0x4f + SCM_DEVMEM_LINEAR = 0x4e SCM_TIMESTAMPING = 0x25 SCM_TIMESTAMPING_OPT_STATS = 0x36 SCM_TIMESTAMPING_PKTINFO = 0x3a @@ -309,6 +331,9 @@ const ( SO_CNX_ADVICE = 0x35 SO_COOKIE = 0x39 SO_DETACH_REUSEPORT_BPF = 0x44 + SO_DEVMEM_DMABUF = 0x4f + SO_DEVMEM_DONTNEED = 0x50 + SO_DEVMEM_LINEAR = 0x4e SO_DOMAIN = 0x27 SO_DONTROUTE = 0x5 SO_ERROR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go index 01e6ea7..958e6e0 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go @@ -108,6 +108,7 @@ const ( HIDIOCGRAWINFO = 0x40084803 HIDIOCGRDESC = 0x50044802 HIDIOCGRDESCSIZE = 0x40044801 + HIDIOCREVOKE = 0x8004480d HUPCL = 0x400 ICANON = 0x2 IEXTEN = 0x100 @@ -150,9 +151,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 @@ -229,6 +235,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x20007434 PPPIOCXFERUNIT = 0x2000744e PR_SET_PTRACER_ANY = 0xffffffff + PTP_CLOCK_GETCAPS = 0x40503d01 + PTP_CLOCK_GETCAPS2 = 0x40503d0a + PTP_ENABLE_PPS = 0x80043d04 + PTP_ENABLE_PPS2 = 0x80043d0d + PTP_EXTTS_REQUEST = 0x80103d02 + PTP_EXTTS_REQUEST2 = 0x80103d0b + PTP_MASK_CLEAR_ALL = 0x20003d13 + PTP_MASK_EN_SINGLE = 0x80043d14 + PTP_PEROUT_REQUEST = 0x80383d03 + PTP_PEROUT_REQUEST2 = 0x80383d0c + PTP_PIN_SETFUNC = 0x80603d07 + PTP_PIN_SETFUNC2 = 0x80603d10 + PTP_SYS_OFFSET = 0x83403d05 + PTP_SYS_OFFSET2 = 0x83403d0e PTRACE_GETFPREGS = 0xe PTRACE_GET_THREAD_AREA = 0x19 PTRACE_GET_THREAD_AREA_3264 = 0xc4 @@ -277,6 +297,8 @@ const ( RTC_WIE_ON = 0x2000700f RTC_WKALM_RD = 0x40287010 RTC_WKALM_SET = 0x8028700f + SCM_DEVMEM_DMABUF = 0x4f + SCM_DEVMEM_LINEAR = 0x4e SCM_TIMESTAMPING = 0x25 SCM_TIMESTAMPING_OPT_STATS = 0x36 SCM_TIMESTAMPING_PKTINFO = 0x3a @@ -315,6 +337,9 @@ const ( SO_CNX_ADVICE = 0x35 SO_COOKIE = 0x39 SO_DETACH_REUSEPORT_BPF = 0x44 + SO_DEVMEM_DMABUF = 0x4f + SO_DEVMEM_DONTNEED = 0x50 + SO_DEVMEM_LINEAR = 0x4e SO_DOMAIN = 0x1029 SO_DONTROUTE = 0x10 SO_ERROR = 0x1007 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go index 7aa610b..50c7f25 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go @@ -108,6 +108,7 @@ const ( HIDIOCGRAWINFO = 0x40084803 HIDIOCGRDESC = 0x50044802 HIDIOCGRDESCSIZE = 0x40044801 + HIDIOCREVOKE = 0x8004480d HUPCL = 0x400 ICANON = 0x2 IEXTEN = 0x100 @@ -150,9 +151,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 @@ -229,6 +235,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x20007434 PPPIOCXFERUNIT = 0x2000744e PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x40503d01 + PTP_CLOCK_GETCAPS2 = 0x40503d0a + PTP_ENABLE_PPS = 0x80043d04 + PTP_ENABLE_PPS2 = 0x80043d0d + PTP_EXTTS_REQUEST = 0x80103d02 + PTP_EXTTS_REQUEST2 = 0x80103d0b + PTP_MASK_CLEAR_ALL = 0x20003d13 + PTP_MASK_EN_SINGLE = 0x80043d14 + PTP_PEROUT_REQUEST = 0x80383d03 + PTP_PEROUT_REQUEST2 = 0x80383d0c + PTP_PIN_SETFUNC = 0x80603d07 + PTP_PIN_SETFUNC2 = 0x80603d10 + PTP_SYS_OFFSET = 0x83403d05 + PTP_SYS_OFFSET2 = 0x83403d0e PTRACE_GETFPREGS = 0xe PTRACE_GET_THREAD_AREA = 0x19 PTRACE_GET_THREAD_AREA_3264 = 0xc4 @@ -277,6 +297,8 @@ const ( RTC_WIE_ON = 0x2000700f RTC_WKALM_RD = 0x40287010 RTC_WKALM_SET = 0x8028700f + SCM_DEVMEM_DMABUF = 0x4f + SCM_DEVMEM_LINEAR = 0x4e SCM_TIMESTAMPING = 0x25 SCM_TIMESTAMPING_OPT_STATS = 0x36 SCM_TIMESTAMPING_PKTINFO = 0x3a @@ -315,6 +337,9 @@ const ( SO_CNX_ADVICE = 0x35 SO_COOKIE = 0x39 SO_DETACH_REUSEPORT_BPF = 0x44 + SO_DEVMEM_DMABUF = 0x4f + SO_DEVMEM_DONTNEED = 0x50 + SO_DEVMEM_LINEAR = 0x4e SO_DOMAIN = 0x1029 SO_DONTROUTE = 0x10 SO_ERROR = 0x1007 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go index 92af771..ced21d6 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go @@ -108,6 +108,7 @@ const ( HIDIOCGRAWINFO = 0x40084803 HIDIOCGRDESC = 0x50044802 HIDIOCGRDESCSIZE = 0x40044801 + HIDIOCREVOKE = 0x8004480d HUPCL = 0x400 ICANON = 0x2 IEXTEN = 0x100 @@ -150,9 +151,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 @@ -229,6 +235,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x20007434 PPPIOCXFERUNIT = 0x2000744e PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x40503d01 + PTP_CLOCK_GETCAPS2 = 0x40503d0a + PTP_ENABLE_PPS = 0x80043d04 + PTP_ENABLE_PPS2 = 0x80043d0d + PTP_EXTTS_REQUEST = 0x80103d02 + PTP_EXTTS_REQUEST2 = 0x80103d0b + PTP_MASK_CLEAR_ALL = 0x20003d13 + PTP_MASK_EN_SINGLE = 0x80043d14 + PTP_PEROUT_REQUEST = 0x80383d03 + PTP_PEROUT_REQUEST2 = 0x80383d0c + PTP_PIN_SETFUNC = 0x80603d07 + PTP_PIN_SETFUNC2 = 0x80603d10 + PTP_SYS_OFFSET = 0x83403d05 + PTP_SYS_OFFSET2 = 0x83403d0e PTRACE_GETFPREGS = 0xe PTRACE_GET_THREAD_AREA = 0x19 PTRACE_GET_THREAD_AREA_3264 = 0xc4 @@ -277,6 +297,8 @@ const ( RTC_WIE_ON = 0x2000700f RTC_WKALM_RD = 0x40287010 RTC_WKALM_SET = 0x8028700f + SCM_DEVMEM_DMABUF = 0x4f + SCM_DEVMEM_LINEAR = 0x4e SCM_TIMESTAMPING = 0x25 SCM_TIMESTAMPING_OPT_STATS = 0x36 SCM_TIMESTAMPING_PKTINFO = 0x3a @@ -315,6 +337,9 @@ const ( SO_CNX_ADVICE = 0x35 SO_COOKIE = 0x39 SO_DETACH_REUSEPORT_BPF = 0x44 + SO_DEVMEM_DMABUF = 0x4f + SO_DEVMEM_DONTNEED = 0x50 + SO_DEVMEM_LINEAR = 0x4e SO_DOMAIN = 0x1029 SO_DONTROUTE = 0x10 SO_ERROR = 0x1007 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go index b27ef5e..226c044 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go @@ -108,6 +108,7 @@ const ( HIDIOCGRAWINFO = 0x40084803 HIDIOCGRDESC = 0x50044802 HIDIOCGRDESCSIZE = 0x40044801 + HIDIOCREVOKE = 0x8004480d HUPCL = 0x400 ICANON = 0x2 IEXTEN = 0x100 @@ -150,9 +151,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 @@ -229,6 +235,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x20007434 PPPIOCXFERUNIT = 0x2000744e PR_SET_PTRACER_ANY = 0xffffffff + PTP_CLOCK_GETCAPS = 0x40503d01 + PTP_CLOCK_GETCAPS2 = 0x40503d0a + PTP_ENABLE_PPS = 0x80043d04 + PTP_ENABLE_PPS2 = 0x80043d0d + PTP_EXTTS_REQUEST = 0x80103d02 + PTP_EXTTS_REQUEST2 = 0x80103d0b + PTP_MASK_CLEAR_ALL = 0x20003d13 + PTP_MASK_EN_SINGLE = 0x80043d14 + PTP_PEROUT_REQUEST = 0x80383d03 + PTP_PEROUT_REQUEST2 = 0x80383d0c + PTP_PIN_SETFUNC = 0x80603d07 + PTP_PIN_SETFUNC2 = 0x80603d10 + PTP_SYS_OFFSET = 0x83403d05 + PTP_SYS_OFFSET2 = 0x83403d0e PTRACE_GETFPREGS = 0xe PTRACE_GET_THREAD_AREA = 0x19 PTRACE_GET_THREAD_AREA_3264 = 0xc4 @@ -277,6 +297,8 @@ const ( RTC_WIE_ON = 0x2000700f RTC_WKALM_RD = 0x40287010 RTC_WKALM_SET = 0x8028700f + SCM_DEVMEM_DMABUF = 0x4f + SCM_DEVMEM_LINEAR = 0x4e SCM_TIMESTAMPING = 0x25 SCM_TIMESTAMPING_OPT_STATS = 0x36 SCM_TIMESTAMPING_PKTINFO = 0x3a @@ -315,6 +337,9 @@ const ( SO_CNX_ADVICE = 0x35 SO_COOKIE = 0x39 SO_DETACH_REUSEPORT_BPF = 0x44 + SO_DEVMEM_DMABUF = 0x4f + SO_DEVMEM_DONTNEED = 0x50 + SO_DEVMEM_LINEAR = 0x4e SO_DOMAIN = 0x1029 SO_DONTROUTE = 0x10 SO_ERROR = 0x1007 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go index 237a2ce..3122737 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go @@ -108,6 +108,7 @@ const ( HIDIOCGRAWINFO = 0x40084803 HIDIOCGRDESC = 0x50044802 HIDIOCGRDESCSIZE = 0x40044801 + HIDIOCREVOKE = 0x8004480d HUPCL = 0x4000 ICANON = 0x100 IEXTEN = 0x400 @@ -152,9 +153,14 @@ const ( NL3 = 0x300 NLDLY = 0x300 NOFLSH = 0x80000000 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x4 ONLCR = 0x2 @@ -232,6 +238,20 @@ const ( PPPIOCXFERUNIT = 0x2000744e PROT_SAO = 0x10 PR_SET_PTRACER_ANY = 0xffffffff + PTP_CLOCK_GETCAPS = 0x40503d01 + PTP_CLOCK_GETCAPS2 = 0x40503d0a + PTP_ENABLE_PPS = 0x80043d04 + PTP_ENABLE_PPS2 = 0x80043d0d + PTP_EXTTS_REQUEST = 0x80103d02 + PTP_EXTTS_REQUEST2 = 0x80103d0b + PTP_MASK_CLEAR_ALL = 0x20003d13 + PTP_MASK_EN_SINGLE = 0x80043d14 + PTP_PEROUT_REQUEST = 0x80383d03 + PTP_PEROUT_REQUEST2 = 0x80383d0c + PTP_PIN_SETFUNC = 0x80603d07 + PTP_PIN_SETFUNC2 = 0x80603d10 + PTP_SYS_OFFSET = 0x83403d05 + PTP_SYS_OFFSET2 = 0x83403d0e PTRACE_GETEVRREGS = 0x14 PTRACE_GETFPREGS = 0xe PTRACE_GETREGS64 = 0x16 @@ -332,6 +352,8 @@ const ( RTC_WIE_ON = 0x2000700f RTC_WKALM_RD = 0x40287010 RTC_WKALM_SET = 0x8028700f + SCM_DEVMEM_DMABUF = 0x4f + SCM_DEVMEM_LINEAR = 0x4e SCM_TIMESTAMPING = 0x25 SCM_TIMESTAMPING_OPT_STATS = 0x36 SCM_TIMESTAMPING_PKTINFO = 0x3a @@ -370,6 +392,9 @@ const ( SO_CNX_ADVICE = 0x35 SO_COOKIE = 0x39 SO_DETACH_REUSEPORT_BPF = 0x44 + SO_DEVMEM_DMABUF = 0x4f + SO_DEVMEM_DONTNEED = 0x50 + SO_DEVMEM_LINEAR = 0x4e SO_DOMAIN = 0x27 SO_DONTROUTE = 0x5 SO_ERROR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go index 4a5c555..eb5d346 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go @@ -108,6 +108,7 @@ const ( HIDIOCGRAWINFO = 0x40084803 HIDIOCGRDESC = 0x50044802 HIDIOCGRDESCSIZE = 0x40044801 + HIDIOCREVOKE = 0x8004480d HUPCL = 0x4000 ICANON = 0x100 IEXTEN = 0x400 @@ -152,9 +153,14 @@ const ( NL3 = 0x300 NLDLY = 0x300 NOFLSH = 0x80000000 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x4 ONLCR = 0x2 @@ -232,6 +238,20 @@ const ( PPPIOCXFERUNIT = 0x2000744e PROT_SAO = 0x10 PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x40503d01 + PTP_CLOCK_GETCAPS2 = 0x40503d0a + PTP_ENABLE_PPS = 0x80043d04 + PTP_ENABLE_PPS2 = 0x80043d0d + PTP_EXTTS_REQUEST = 0x80103d02 + PTP_EXTTS_REQUEST2 = 0x80103d0b + PTP_MASK_CLEAR_ALL = 0x20003d13 + PTP_MASK_EN_SINGLE = 0x80043d14 + PTP_PEROUT_REQUEST = 0x80383d03 + PTP_PEROUT_REQUEST2 = 0x80383d0c + PTP_PIN_SETFUNC = 0x80603d07 + PTP_PIN_SETFUNC2 = 0x80603d10 + PTP_SYS_OFFSET = 0x83403d05 + PTP_SYS_OFFSET2 = 0x83403d0e PTRACE_GETEVRREGS = 0x14 PTRACE_GETFPREGS = 0xe PTRACE_GETREGS64 = 0x16 @@ -336,6 +356,8 @@ const ( RTC_WIE_ON = 0x2000700f RTC_WKALM_RD = 0x40287010 RTC_WKALM_SET = 0x8028700f + SCM_DEVMEM_DMABUF = 0x4f + SCM_DEVMEM_LINEAR = 0x4e SCM_TIMESTAMPING = 0x25 SCM_TIMESTAMPING_OPT_STATS = 0x36 SCM_TIMESTAMPING_PKTINFO = 0x3a @@ -374,6 +396,9 @@ const ( SO_CNX_ADVICE = 0x35 SO_COOKIE = 0x39 SO_DETACH_REUSEPORT_BPF = 0x44 + SO_DEVMEM_DMABUF = 0x4f + SO_DEVMEM_DONTNEED = 0x50 + SO_DEVMEM_LINEAR = 0x4e SO_DOMAIN = 0x27 SO_DONTROUTE = 0x5 SO_ERROR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go index a02fb49..e921ebc 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go @@ -108,6 +108,7 @@ const ( HIDIOCGRAWINFO = 0x40084803 HIDIOCGRDESC = 0x50044802 HIDIOCGRDESCSIZE = 0x40044801 + HIDIOCREVOKE = 0x8004480d HUPCL = 0x4000 ICANON = 0x100 IEXTEN = 0x400 @@ -152,9 +153,14 @@ const ( NL3 = 0x300 NLDLY = 0x300 NOFLSH = 0x80000000 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x4 ONLCR = 0x2 @@ -232,6 +238,20 @@ const ( PPPIOCXFERUNIT = 0x2000744e PROT_SAO = 0x10 PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x40503d01 + PTP_CLOCK_GETCAPS2 = 0x40503d0a + PTP_ENABLE_PPS = 0x80043d04 + PTP_ENABLE_PPS2 = 0x80043d0d + PTP_EXTTS_REQUEST = 0x80103d02 + PTP_EXTTS_REQUEST2 = 0x80103d0b + PTP_MASK_CLEAR_ALL = 0x20003d13 + PTP_MASK_EN_SINGLE = 0x80043d14 + PTP_PEROUT_REQUEST = 0x80383d03 + PTP_PEROUT_REQUEST2 = 0x80383d0c + PTP_PIN_SETFUNC = 0x80603d07 + PTP_PIN_SETFUNC2 = 0x80603d10 + PTP_SYS_OFFSET = 0x83403d05 + PTP_SYS_OFFSET2 = 0x83403d0e PTRACE_GETEVRREGS = 0x14 PTRACE_GETFPREGS = 0xe PTRACE_GETREGS64 = 0x16 @@ -336,6 +356,8 @@ const ( RTC_WIE_ON = 0x2000700f RTC_WKALM_RD = 0x40287010 RTC_WKALM_SET = 0x8028700f + SCM_DEVMEM_DMABUF = 0x4f + SCM_DEVMEM_LINEAR = 0x4e SCM_TIMESTAMPING = 0x25 SCM_TIMESTAMPING_OPT_STATS = 0x36 SCM_TIMESTAMPING_PKTINFO = 0x3a @@ -374,6 +396,9 @@ const ( SO_CNX_ADVICE = 0x35 SO_COOKIE = 0x39 SO_DETACH_REUSEPORT_BPF = 0x44 + SO_DEVMEM_DMABUF = 0x4f + SO_DEVMEM_DONTNEED = 0x50 + SO_DEVMEM_LINEAR = 0x4e SO_DOMAIN = 0x27 SO_DONTROUTE = 0x5 SO_ERROR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go index e26a7c6..38ba81c 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go @@ -108,6 +108,7 @@ const ( HIDIOCGRAWINFO = 0x80084803 HIDIOCGRDESC = 0x90044802 HIDIOCGRDESCSIZE = 0x80044801 + HIDIOCREVOKE = 0x4004480d HUPCL = 0x400 ICANON = 0x2 IEXTEN = 0x8000 @@ -150,9 +151,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 @@ -229,6 +235,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x7434 PPPIOCXFERUNIT = 0x744e PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x80503d01 + PTP_CLOCK_GETCAPS2 = 0x80503d0a + PTP_ENABLE_PPS = 0x40043d04 + PTP_ENABLE_PPS2 = 0x40043d0d + PTP_EXTTS_REQUEST = 0x40103d02 + PTP_EXTTS_REQUEST2 = 0x40103d0b + PTP_MASK_CLEAR_ALL = 0x3d13 + PTP_MASK_EN_SINGLE = 0x40043d14 + PTP_PEROUT_REQUEST = 0x40383d03 + PTP_PEROUT_REQUEST2 = 0x40383d0c + PTP_PIN_SETFUNC = 0x40603d07 + PTP_PIN_SETFUNC2 = 0x40603d10 + PTP_SYS_OFFSET = 0x43403d05 + PTP_SYS_OFFSET2 = 0x43403d0e PTRACE_GETFDPIC = 0x21 PTRACE_GETFDPIC_EXEC = 0x0 PTRACE_GETFDPIC_INTERP = 0x1 @@ -268,6 +288,8 @@ const ( RTC_WIE_ON = 0x700f RTC_WKALM_RD = 0x80287010 RTC_WKALM_SET = 0x4028700f + SCM_DEVMEM_DMABUF = 0x4f + SCM_DEVMEM_LINEAR = 0x4e SCM_TIMESTAMPING = 0x25 SCM_TIMESTAMPING_OPT_STATS = 0x36 SCM_TIMESTAMPING_PKTINFO = 0x3a @@ -306,6 +328,9 @@ const ( SO_CNX_ADVICE = 0x35 SO_COOKIE = 0x39 SO_DETACH_REUSEPORT_BPF = 0x44 + SO_DEVMEM_DMABUF = 0x4f + SO_DEVMEM_DONTNEED = 0x50 + SO_DEVMEM_LINEAR = 0x4e SO_DOMAIN = 0x27 SO_DONTROUTE = 0x5 SO_ERROR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go index c48f7c2..71f0400 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go @@ -108,6 +108,7 @@ const ( HIDIOCGRAWINFO = 0x80084803 HIDIOCGRDESC = 0x90044802 HIDIOCGRDESCSIZE = 0x80044801 + HIDIOCREVOKE = 0x4004480d HUPCL = 0x400 ICANON = 0x2 IEXTEN = 0x8000 @@ -150,9 +151,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 @@ -229,6 +235,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x7434 PPPIOCXFERUNIT = 0x744e PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x80503d01 + PTP_CLOCK_GETCAPS2 = 0x80503d0a + PTP_ENABLE_PPS = 0x40043d04 + PTP_ENABLE_PPS2 = 0x40043d0d + PTP_EXTTS_REQUEST = 0x40103d02 + PTP_EXTTS_REQUEST2 = 0x40103d0b + PTP_MASK_CLEAR_ALL = 0x3d13 + PTP_MASK_EN_SINGLE = 0x40043d14 + PTP_PEROUT_REQUEST = 0x40383d03 + PTP_PEROUT_REQUEST2 = 0x40383d0c + PTP_PIN_SETFUNC = 0x40603d07 + PTP_PIN_SETFUNC2 = 0x40603d10 + PTP_SYS_OFFSET = 0x43403d05 + PTP_SYS_OFFSET2 = 0x43403d0e PTRACE_DISABLE_TE = 0x5010 PTRACE_ENABLE_TE = 0x5009 PTRACE_GET_LAST_BREAK = 0x5006 @@ -340,6 +360,8 @@ const ( RTC_WIE_ON = 0x700f RTC_WKALM_RD = 0x80287010 RTC_WKALM_SET = 0x4028700f + SCM_DEVMEM_DMABUF = 0x4f + SCM_DEVMEM_LINEAR = 0x4e SCM_TIMESTAMPING = 0x25 SCM_TIMESTAMPING_OPT_STATS = 0x36 SCM_TIMESTAMPING_PKTINFO = 0x3a @@ -378,6 +400,9 @@ const ( SO_CNX_ADVICE = 0x35 SO_COOKIE = 0x39 SO_DETACH_REUSEPORT_BPF = 0x44 + SO_DEVMEM_DMABUF = 0x4f + SO_DEVMEM_DONTNEED = 0x50 + SO_DEVMEM_LINEAR = 0x4e SO_DOMAIN = 0x27 SO_DONTROUTE = 0x5 SO_ERROR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go index ad4b9aa..c44a313 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go @@ -112,6 +112,7 @@ const ( HIDIOCGRAWINFO = 0x40084803 HIDIOCGRDESC = 0x50044802 HIDIOCGRDESCSIZE = 0x40044801 + HIDIOCREVOKE = 0x8004480d HUPCL = 0x400 ICANON = 0x2 IEXTEN = 0x8000 @@ -155,9 +156,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 @@ -234,6 +240,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x20007434 PPPIOCXFERUNIT = 0x2000744e PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x40503d01 + PTP_CLOCK_GETCAPS2 = 0x40503d0a + PTP_ENABLE_PPS = 0x80043d04 + PTP_ENABLE_PPS2 = 0x80043d0d + PTP_EXTTS_REQUEST = 0x80103d02 + PTP_EXTTS_REQUEST2 = 0x80103d0b + PTP_MASK_CLEAR_ALL = 0x20003d13 + PTP_MASK_EN_SINGLE = 0x80043d14 + PTP_PEROUT_REQUEST = 0x80383d03 + PTP_PEROUT_REQUEST2 = 0x80383d0c + PTP_PIN_SETFUNC = 0x80603d07 + PTP_PIN_SETFUNC2 = 0x80603d10 + PTP_SYS_OFFSET = 0x83403d05 + PTP_SYS_OFFSET2 = 0x83403d0e PTRACE_GETFPAREGS = 0x14 PTRACE_GETFPREGS = 0xe PTRACE_GETFPREGS64 = 0x19 @@ -331,6 +351,8 @@ const ( RTC_WIE_ON = 0x2000700f RTC_WKALM_RD = 0x40287010 RTC_WKALM_SET = 0x8028700f + SCM_DEVMEM_DMABUF = 0x58 + SCM_DEVMEM_LINEAR = 0x57 SCM_TIMESTAMPING = 0x23 SCM_TIMESTAMPING_OPT_STATS = 0x38 SCM_TIMESTAMPING_PKTINFO = 0x3c @@ -417,6 +439,9 @@ const ( SO_CNX_ADVICE = 0x37 SO_COOKIE = 0x3b SO_DETACH_REUSEPORT_BPF = 0x47 + SO_DEVMEM_DMABUF = 0x58 + SO_DEVMEM_DONTNEED = 0x59 + SO_DEVMEM_LINEAR = 0x57 SO_DOMAIN = 0x1029 SO_DONTROUTE = 0x10 SO_ERROR = 0x1007 diff --git a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go index da08b2a..1ec2b14 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go +++ b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go @@ -581,6 +581,8 @@ const ( AT_EMPTY_PATH = 0x1000 AT_REMOVEDIR = 0x200 RENAME_NOREPLACE = 1 << 0 + ST_RDONLY = 1 + ST_NOSUID = 2 ) const ( diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go index b622533..24b346e 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go @@ -841,6 +841,26 @@ var libc_pthread_fchdir_np_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) { + var _p0 unsafe.Pointer + if len(iov) > 0 { + _p0 = unsafe.Pointer(&iov[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + _, _, e1 := syscall_syscall9(libc_connectx_trampoline_addr, uintptr(fd), uintptr(unsafe.Pointer(endpoints)), uintptr(associd), uintptr(flags), uintptr(_p0), uintptr(len(iov)), uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(connid)), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_connectx_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_connectx connectx "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) { _, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags)) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s index cfe6646..ebd2131 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s @@ -248,6 +248,11 @@ TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8 DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB) +TEXT libc_connectx_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_connectx(SB) +GLOBL ·libc_connectx_trampoline_addr(SB), RODATA, $8 +DATA ·libc_connectx_trampoline_addr(SB)/8, $libc_connectx_trampoline<>(SB) + TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sendfile(SB) GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go index 13f624f..824b9c2 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go @@ -841,6 +841,26 @@ var libc_pthread_fchdir_np_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) { + var _p0 unsafe.Pointer + if len(iov) > 0 { + _p0 = unsafe.Pointer(&iov[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + _, _, e1 := syscall_syscall9(libc_connectx_trampoline_addr, uintptr(fd), uintptr(unsafe.Pointer(endpoints)), uintptr(associd), uintptr(flags), uintptr(_p0), uintptr(len(iov)), uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(connid)), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_connectx_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_connectx connectx "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) { _, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags)) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s index fe222b7..4f178a2 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s @@ -248,6 +248,11 @@ TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8 DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB) +TEXT libc_connectx_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_connectx(SB) +GLOBL ·libc_connectx_trampoline_addr(SB), RODATA, $8 +DATA ·libc_connectx_trampoline_addr(SB)/8, $libc_connectx_trampoline<>(SB) + TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sendfile(SB) GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux.go b/vendor/golang.org/x/sys/unix/zsyscall_linux.go index 1bc1a5a..5cc1e8e 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux.go @@ -592,6 +592,16 @@ func ClockGettime(clockid int32, time *Timespec) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ClockSettime(clockid int32, time *Timespec) (err error) { + _, _, e1 := Syscall(SYS_CLOCK_SETTIME, uintptr(clockid), uintptr(unsafe.Pointer(time)), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func ClockNanosleep(clockid int32, flags int, request *Timespec, remain *Timespec) (err error) { _, _, e1 := Syscall6(SYS_CLOCK_NANOSLEEP, uintptr(clockid), uintptr(flags), uintptr(unsafe.Pointer(request)), uintptr(unsafe.Pointer(remain)), 0, 0) if e1 != 0 { @@ -971,23 +981,6 @@ func Getpriority(which int, who int) (prio int, err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Getrandom(buf []byte, flags int) (n int, err error) { - var _p0 unsafe.Pointer - if len(buf) > 0 { - _p0 = unsafe.Pointer(&buf[0]) - } else { - _p0 = unsafe.Pointer(&_zero) - } - r0, _, e1 := Syscall(SYS_GETRANDOM, uintptr(_p0), uintptr(len(buf)), uintptr(flags)) - n = int(r0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Getrusage(who int, rusage *Rusage) (err error) { _, _, e1 := RawSyscall(SYS_GETRUSAGE, uintptr(who), uintptr(unsafe.Pointer(rusage)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go index d3e38f6..f485dbf 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go @@ -341,6 +341,7 @@ const ( SYS_STATX = 332 SYS_IO_PGETEVENTS = 333 SYS_RSEQ = 334 + SYS_URETPROBE = 335 SYS_PIDFD_SEND_SIGNAL = 424 SYS_IO_URING_SETUP = 425 SYS_IO_URING_ENTER = 426 diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go index 6c778c2..1893e2f 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go @@ -85,7 +85,7 @@ const ( SYS_SPLICE = 76 SYS_TEE = 77 SYS_READLINKAT = 78 - SYS_FSTATAT = 79 + SYS_NEWFSTATAT = 79 SYS_FSTAT = 80 SYS_SYNC = 81 SYS_FSYNC = 82 diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go index 37281cf..16a4017 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go @@ -84,6 +84,8 @@ const ( SYS_SPLICE = 76 SYS_TEE = 77 SYS_READLINKAT = 78 + SYS_NEWFSTATAT = 79 + SYS_FSTAT = 80 SYS_SYNC = 81 SYS_FSYNC = 82 SYS_FDATASYNC = 83 diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go index 9889f6a..a5459e7 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go @@ -84,7 +84,7 @@ const ( SYS_SPLICE = 76 SYS_TEE = 77 SYS_READLINKAT = 78 - SYS_FSTATAT = 79 + SYS_NEWFSTATAT = 79 SYS_FSTAT = 80 SYS_SYNC = 81 SYS_FSYNC = 82 diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go index 091d107..17c53bd 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go @@ -306,6 +306,19 @@ type XVSockPgen struct { type _Socklen uint32 +type SaeAssocID uint32 + +type SaeConnID uint32 + +type SaEndpoints struct { + Srcif uint32 + Srcaddr *RawSockaddr + Srcaddrlen uint32 + Dstaddr *RawSockaddr + Dstaddrlen uint32 + _ [4]byte +} + type Xucred struct { Version uint32 Uid uint32 @@ -449,11 +462,14 @@ type FdSet struct { const ( SizeofIfMsghdr = 0x70 + SizeofIfMsghdr2 = 0xa0 SizeofIfData = 0x60 + SizeofIfData64 = 0x80 SizeofIfaMsghdr = 0x14 SizeofIfmaMsghdr = 0x10 SizeofIfmaMsghdr2 = 0x14 SizeofRtMsghdr = 0x5c + SizeofRtMsghdr2 = 0x5c SizeofRtMetrics = 0x38 ) @@ -467,6 +483,20 @@ type IfMsghdr struct { Data IfData } +type IfMsghdr2 struct { + Msglen uint16 + Version uint8 + Type uint8 + Addrs int32 + Flags int32 + Index uint16 + Snd_len int32 + Snd_maxlen int32 + Snd_drops int32 + Timer int32 + Data IfData64 +} + type IfData struct { Type uint8 Typelen uint8 @@ -499,6 +529,34 @@ type IfData struct { Reserved2 uint32 } +type IfData64 struct { + Type uint8 + Typelen uint8 + Physical uint8 + Addrlen uint8 + Hdrlen uint8 + Recvquota uint8 + Xmitquota uint8 + Unused1 uint8 + Mtu uint32 + Metric uint32 + Baudrate uint64 + Ipackets uint64 + Ierrors uint64 + Opackets uint64 + Oerrors uint64 + Collisions uint64 + Ibytes uint64 + Obytes uint64 + Imcasts uint64 + Omcasts uint64 + Iqdrops uint64 + Noproto uint64 + Recvtiming uint32 + Xmittiming uint32 + Lastchange Timeval32 +} + type IfaMsghdr struct { Msglen uint16 Version uint8 @@ -544,6 +602,21 @@ type RtMsghdr struct { Rmx RtMetrics } +type RtMsghdr2 struct { + Msglen uint16 + Version uint8 + Type uint8 + Index uint16 + Flags int32 + Addrs int32 + Refcnt int32 + Parentflags int32 + Reserved int32 + Use int32 + Inits uint32 + Rmx RtMetrics +} + type RtMetrics struct { Locks uint32 Mtu uint32 diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go index 28ff4ef..2392226 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go @@ -306,6 +306,19 @@ type XVSockPgen struct { type _Socklen uint32 +type SaeAssocID uint32 + +type SaeConnID uint32 + +type SaEndpoints struct { + Srcif uint32 + Srcaddr *RawSockaddr + Srcaddrlen uint32 + Dstaddr *RawSockaddr + Dstaddrlen uint32 + _ [4]byte +} + type Xucred struct { Version uint32 Uid uint32 @@ -449,11 +462,14 @@ type FdSet struct { const ( SizeofIfMsghdr = 0x70 + SizeofIfMsghdr2 = 0xa0 SizeofIfData = 0x60 + SizeofIfData64 = 0x80 SizeofIfaMsghdr = 0x14 SizeofIfmaMsghdr = 0x10 SizeofIfmaMsghdr2 = 0x14 SizeofRtMsghdr = 0x5c + SizeofRtMsghdr2 = 0x5c SizeofRtMetrics = 0x38 ) @@ -467,6 +483,20 @@ type IfMsghdr struct { Data IfData } +type IfMsghdr2 struct { + Msglen uint16 + Version uint8 + Type uint8 + Addrs int32 + Flags int32 + Index uint16 + Snd_len int32 + Snd_maxlen int32 + Snd_drops int32 + Timer int32 + Data IfData64 +} + type IfData struct { Type uint8 Typelen uint8 @@ -499,6 +529,34 @@ type IfData struct { Reserved2 uint32 } +type IfData64 struct { + Type uint8 + Typelen uint8 + Physical uint8 + Addrlen uint8 + Hdrlen uint8 + Recvquota uint8 + Xmitquota uint8 + Unused1 uint8 + Mtu uint32 + Metric uint32 + Baudrate uint64 + Ipackets uint64 + Ierrors uint64 + Opackets uint64 + Oerrors uint64 + Collisions uint64 + Ibytes uint64 + Obytes uint64 + Imcasts uint64 + Omcasts uint64 + Iqdrops uint64 + Noproto uint64 + Recvtiming uint32 + Xmittiming uint32 + Lastchange Timeval32 +} + type IfaMsghdr struct { Msglen uint16 Version uint8 @@ -544,6 +602,21 @@ type RtMsghdr struct { Rmx RtMetrics } +type RtMsghdr2 struct { + Msglen uint16 + Version uint8 + Type uint8 + Index uint16 + Flags int32 + Addrs int32 + Refcnt int32 + Parentflags int32 + Reserved int32 + Use int32 + Inits uint32 + Rmx RtMetrics +} + type RtMetrics struct { Locks uint32 Mtu uint32 diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go index 6cbd094..51e13eb 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go @@ -625,6 +625,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go index 7c03b6e..d002d8e 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go @@ -630,6 +630,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go index 422107e..3f863d8 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go @@ -616,6 +616,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go index 505a12a..61c7293 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go @@ -610,6 +610,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go index cc986c7..b5d1741 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go @@ -612,6 +612,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux.go b/vendor/golang.org/x/sys/unix/ztypes_linux.go index 7f1961b..5537148 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux.go @@ -87,31 +87,35 @@ type StatxTimestamp struct { } type Statx_t struct { - Mask uint32 - Blksize uint32 - Attributes uint64 - Nlink uint32 - Uid uint32 - Gid uint32 - Mode uint16 - _ [1]uint16 - Ino uint64 - Size uint64 - Blocks uint64 - Attributes_mask uint64 - Atime StatxTimestamp - Btime StatxTimestamp - Ctime StatxTimestamp - Mtime StatxTimestamp - Rdev_major uint32 - Rdev_minor uint32 - Dev_major uint32 - Dev_minor uint32 - Mnt_id uint64 - Dio_mem_align uint32 - Dio_offset_align uint32 - Subvol uint64 - _ [11]uint64 + Mask uint32 + Blksize uint32 + Attributes uint64 + Nlink uint32 + Uid uint32 + Gid uint32 + Mode uint16 + _ [1]uint16 + Ino uint64 + Size uint64 + Blocks uint64 + Attributes_mask uint64 + Atime StatxTimestamp + Btime StatxTimestamp + Ctime StatxTimestamp + Mtime StatxTimestamp + Rdev_major uint32 + Rdev_minor uint32 + Dev_major uint32 + Dev_minor uint32 + Mnt_id uint64 + Dio_mem_align uint32 + Dio_offset_align uint32 + Subvol uint64 + Atomic_write_unit_min uint32 + Atomic_write_unit_max uint32 + Atomic_write_segments_max uint32 + _ [1]uint32 + _ [9]uint64 } type Fsid struct { @@ -516,6 +520,29 @@ type TCPInfo struct { Total_rto_time uint32 } +type TCPVegasInfo struct { + Enabled uint32 + Rttcnt uint32 + Rtt uint32 + Minrtt uint32 +} + +type TCPDCTCPInfo struct { + Enabled uint16 + Ce_state uint16 + Alpha uint32 + Ab_ecn uint32 + Ab_tot uint32 +} + +type TCPBBRInfo struct { + Bw_lo uint32 + Bw_hi uint32 + Min_rtt uint32 + Pacing_gain uint32 + Cwnd_gain uint32 +} + type CanFilter struct { Id uint32 Mask uint32 @@ -557,6 +584,7 @@ const ( SizeofICMPv6Filter = 0x20 SizeofUcred = 0xc SizeofTCPInfo = 0xf8 + SizeofTCPCCInfo = 0x14 SizeofCanFilter = 0x8 SizeofTCPRepairOpt = 0x8 ) @@ -1724,12 +1752,6 @@ const ( IFLA_IPVLAN_UNSPEC = 0x0 IFLA_IPVLAN_MODE = 0x1 IFLA_IPVLAN_FLAGS = 0x2 - NETKIT_NEXT = -0x1 - NETKIT_PASS = 0x0 - NETKIT_DROP = 0x2 - NETKIT_REDIRECT = 0x7 - NETKIT_L2 = 0x0 - NETKIT_L3 = 0x1 IFLA_NETKIT_UNSPEC = 0x0 IFLA_NETKIT_PEER_INFO = 0x1 IFLA_NETKIT_PRIMARY = 0x2 @@ -1768,6 +1790,7 @@ const ( IFLA_VXLAN_DF = 0x1d IFLA_VXLAN_VNIFILTER = 0x1e IFLA_VXLAN_LOCALBYPASS = 0x1f + IFLA_VXLAN_LABEL_POLICY = 0x20 IFLA_GENEVE_UNSPEC = 0x0 IFLA_GENEVE_ID = 0x1 IFLA_GENEVE_REMOTE = 0x2 @@ -1797,6 +1820,8 @@ const ( IFLA_GTP_ROLE = 0x4 IFLA_GTP_CREATE_SOCKETS = 0x5 IFLA_GTP_RESTART_COUNT = 0x6 + IFLA_GTP_LOCAL = 0x7 + IFLA_GTP_LOCAL6 = 0x8 IFLA_BOND_UNSPEC = 0x0 IFLA_BOND_MODE = 0x1 IFLA_BOND_ACTIVE_SLAVE = 0x2 @@ -1829,6 +1854,7 @@ const ( IFLA_BOND_AD_LACP_ACTIVE = 0x1d IFLA_BOND_MISSED_MAX = 0x1e IFLA_BOND_NS_IP6_TARGET = 0x1f + IFLA_BOND_COUPLED_CONTROL = 0x20 IFLA_BOND_AD_INFO_UNSPEC = 0x0 IFLA_BOND_AD_INFO_AGGREGATOR = 0x1 IFLA_BOND_AD_INFO_NUM_PORTS = 0x2 @@ -1897,6 +1923,7 @@ const ( IFLA_HSR_SEQ_NR = 0x5 IFLA_HSR_VERSION = 0x6 IFLA_HSR_PROTOCOL = 0x7 + IFLA_HSR_INTERLINK = 0x8 IFLA_STATS_UNSPEC = 0x0 IFLA_STATS_LINK_64 = 0x1 IFLA_STATS_LINK_XSTATS = 0x2 @@ -1949,6 +1976,15 @@ const ( IFLA_DSA_MASTER = 0x1 ) +const ( + NETKIT_NEXT = -0x1 + NETKIT_PASS = 0x0 + NETKIT_DROP = 0x2 + NETKIT_REDIRECT = 0x7 + NETKIT_L2 = 0x0 + NETKIT_L3 = 0x1 +) + const ( NF_INET_PRE_ROUTING = 0x0 NF_INET_LOCAL_IN = 0x1 @@ -2486,7 +2522,7 @@ type XDPMmapOffsets struct { type XDPUmemReg struct { Addr uint64 Len uint64 - Chunk_size uint32 + Size uint32 Headroom uint32 Flags uint32 Tx_metadata_len uint32 @@ -2558,8 +2594,8 @@ const ( SOF_TIMESTAMPING_BIND_PHC = 0x8000 SOF_TIMESTAMPING_OPT_ID_TCP = 0x10000 - SOF_TIMESTAMPING_LAST = 0x10000 - SOF_TIMESTAMPING_MASK = 0x1ffff + SOF_TIMESTAMPING_LAST = 0x20000 + SOF_TIMESTAMPING_MASK = 0x3ffff SCM_TSTAMP_SND = 0x0 SCM_TSTAMP_SCHED = 0x1 @@ -3505,7 +3541,7 @@ type Nhmsg struct { type NexthopGrp struct { Id uint32 Weight uint8 - Resvd1 uint8 + High uint8 Resvd2 uint16 } @@ -3766,7 +3802,7 @@ const ( ETHTOOL_MSG_PSE_GET = 0x24 ETHTOOL_MSG_PSE_SET = 0x25 ETHTOOL_MSG_RSS_GET = 0x26 - ETHTOOL_MSG_USER_MAX = 0x2b + ETHTOOL_MSG_USER_MAX = 0x2d ETHTOOL_MSG_KERNEL_NONE = 0x0 ETHTOOL_MSG_STRSET_GET_REPLY = 0x1 ETHTOOL_MSG_LINKINFO_GET_REPLY = 0x2 @@ -3806,7 +3842,7 @@ const ( ETHTOOL_MSG_MODULE_NTF = 0x24 ETHTOOL_MSG_PSE_GET_REPLY = 0x25 ETHTOOL_MSG_RSS_GET_REPLY = 0x26 - ETHTOOL_MSG_KERNEL_MAX = 0x2b + ETHTOOL_MSG_KERNEL_MAX = 0x2e ETHTOOL_FLAG_COMPACT_BITSETS = 0x1 ETHTOOL_FLAG_OMIT_REPLY = 0x2 ETHTOOL_FLAG_STATS = 0x4 @@ -3814,7 +3850,7 @@ const ( ETHTOOL_A_HEADER_DEV_INDEX = 0x1 ETHTOOL_A_HEADER_DEV_NAME = 0x2 ETHTOOL_A_HEADER_FLAGS = 0x3 - ETHTOOL_A_HEADER_MAX = 0x3 + ETHTOOL_A_HEADER_MAX = 0x4 ETHTOOL_A_BITSET_BIT_UNSPEC = 0x0 ETHTOOL_A_BITSET_BIT_INDEX = 0x1 ETHTOOL_A_BITSET_BIT_NAME = 0x2 @@ -3951,7 +3987,7 @@ const ( ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL = 0x17 ETHTOOL_A_COALESCE_USE_CQE_MODE_TX = 0x18 ETHTOOL_A_COALESCE_USE_CQE_MODE_RX = 0x19 - ETHTOOL_A_COALESCE_MAX = 0x1c + ETHTOOL_A_COALESCE_MAX = 0x1e ETHTOOL_A_PAUSE_UNSPEC = 0x0 ETHTOOL_A_PAUSE_HEADER = 0x1 ETHTOOL_A_PAUSE_AUTONEG = 0x2 @@ -3995,11 +4031,11 @@ const ( ETHTOOL_A_CABLE_RESULT_UNSPEC = 0x0 ETHTOOL_A_CABLE_RESULT_PAIR = 0x1 ETHTOOL_A_CABLE_RESULT_CODE = 0x2 - ETHTOOL_A_CABLE_RESULT_MAX = 0x2 + ETHTOOL_A_CABLE_RESULT_MAX = 0x3 ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC = 0x0 ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR = 0x1 ETHTOOL_A_CABLE_FAULT_LENGTH_CM = 0x2 - ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = 0x2 + ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = 0x3 ETHTOOL_A_CABLE_TEST_NTF_STATUS_UNSPEC = 0x0 ETHTOOL_A_CABLE_TEST_NTF_STATUS_STARTED = 0x1 ETHTOOL_A_CABLE_TEST_NTF_STATUS_COMPLETED = 0x2 @@ -4082,6 +4118,107 @@ type EthtoolDrvinfo struct { Regdump_len uint32 } +type EthtoolTsInfo struct { + Cmd uint32 + So_timestamping uint32 + Phc_index int32 + Tx_types uint32 + Tx_reserved [3]uint32 + Rx_filters uint32 + Rx_reserved [3]uint32 +} + +type HwTstampConfig struct { + Flags int32 + Tx_type int32 + Rx_filter int32 +} + +const ( + HWTSTAMP_FILTER_NONE = 0x0 + HWTSTAMP_FILTER_ALL = 0x1 + HWTSTAMP_FILTER_SOME = 0x2 + HWTSTAMP_FILTER_PTP_V1_L4_EVENT = 0x3 + HWTSTAMP_FILTER_PTP_V2_L4_EVENT = 0x6 + HWTSTAMP_FILTER_PTP_V2_L2_EVENT = 0x9 + HWTSTAMP_FILTER_PTP_V2_EVENT = 0xc +) + +const ( + HWTSTAMP_TX_OFF = 0x0 + HWTSTAMP_TX_ON = 0x1 + HWTSTAMP_TX_ONESTEP_SYNC = 0x2 +) + +type ( + PtpClockCaps struct { + Max_adj int32 + N_alarm int32 + N_ext_ts int32 + N_per_out int32 + Pps int32 + N_pins int32 + Cross_timestamping int32 + Adjust_phase int32 + Max_phase_adj int32 + Rsv [11]int32 + } + PtpClockTime struct { + Sec int64 + Nsec uint32 + Reserved uint32 + } + PtpExttsEvent struct { + T PtpClockTime + Index uint32 + Flags uint32 + Rsv [2]uint32 + } + PtpExttsRequest struct { + Index uint32 + Flags uint32 + Rsv [2]uint32 + } + PtpPeroutRequest struct { + StartOrPhase PtpClockTime + Period PtpClockTime + Index uint32 + Flags uint32 + On PtpClockTime + } + PtpPinDesc struct { + Name [64]byte + Index uint32 + Func uint32 + Chan uint32 + Rsv [5]uint32 + } + PtpSysOffset struct { + Samples uint32 + Rsv [3]uint32 + Ts [51]PtpClockTime + } + PtpSysOffsetExtended struct { + Samples uint32 + Clockid int32 + Rsv [2]uint32 + Ts [25][3]PtpClockTime + } + PtpSysOffsetPrecise struct { + Device PtpClockTime + Realtime PtpClockTime + Monoraw PtpClockTime + Rsv [4]uint32 + } +) + +const ( + PTP_PF_NONE = 0x0 + PTP_PF_EXTTS = 0x1 + PTP_PF_PEROUT = 0x2 + PTP_PF_PHYSYNC = 0x3 +) + type ( HIDRawReportDescriptor struct { Size uint32 @@ -4263,6 +4400,7 @@ const ( type LandlockRulesetAttr struct { Access_fs uint64 Access_net uint64 + Scoped uint64 } type LandlockPathBeneathAttr struct { @@ -4609,7 +4747,7 @@ const ( NL80211_ATTR_MAC_HINT = 0xc8 NL80211_ATTR_MAC_MASK = 0xd7 NL80211_ATTR_MAX_AP_ASSOC_STA = 0xca - NL80211_ATTR_MAX = 0x14a + NL80211_ATTR_MAX = 0x14c NL80211_ATTR_MAX_CRIT_PROT_DURATION = 0xb4 NL80211_ATTR_MAX_CSA_COUNTERS = 0xce NL80211_ATTR_MAX_MATCH_SETS = 0x85 @@ -5213,7 +5351,7 @@ const ( NL80211_FREQUENCY_ATTR_GO_CONCURRENT = 0xf NL80211_FREQUENCY_ATTR_INDOOR_ONLY = 0xe NL80211_FREQUENCY_ATTR_IR_CONCURRENT = 0xf - NL80211_FREQUENCY_ATTR_MAX = 0x20 + NL80211_FREQUENCY_ATTR_MAX = 0x21 NL80211_FREQUENCY_ATTR_MAX_TX_POWER = 0x6 NL80211_FREQUENCY_ATTR_NO_10MHZ = 0x11 NL80211_FREQUENCY_ATTR_NO_160MHZ = 0xc diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go index 15adc04..ad05b51 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go @@ -727,6 +727,37 @@ const ( RISCV_HWPROBE_EXT_ZBA = 0x8 RISCV_HWPROBE_EXT_ZBB = 0x10 RISCV_HWPROBE_EXT_ZBS = 0x20 + RISCV_HWPROBE_EXT_ZICBOZ = 0x40 + RISCV_HWPROBE_EXT_ZBC = 0x80 + RISCV_HWPROBE_EXT_ZBKB = 0x100 + RISCV_HWPROBE_EXT_ZBKC = 0x200 + RISCV_HWPROBE_EXT_ZBKX = 0x400 + RISCV_HWPROBE_EXT_ZKND = 0x800 + RISCV_HWPROBE_EXT_ZKNE = 0x1000 + RISCV_HWPROBE_EXT_ZKNH = 0x2000 + RISCV_HWPROBE_EXT_ZKSED = 0x4000 + RISCV_HWPROBE_EXT_ZKSH = 0x8000 + RISCV_HWPROBE_EXT_ZKT = 0x10000 + RISCV_HWPROBE_EXT_ZVBB = 0x20000 + RISCV_HWPROBE_EXT_ZVBC = 0x40000 + RISCV_HWPROBE_EXT_ZVKB = 0x80000 + RISCV_HWPROBE_EXT_ZVKG = 0x100000 + RISCV_HWPROBE_EXT_ZVKNED = 0x200000 + RISCV_HWPROBE_EXT_ZVKNHA = 0x400000 + RISCV_HWPROBE_EXT_ZVKNHB = 0x800000 + RISCV_HWPROBE_EXT_ZVKSED = 0x1000000 + RISCV_HWPROBE_EXT_ZVKSH = 0x2000000 + RISCV_HWPROBE_EXT_ZVKT = 0x4000000 + RISCV_HWPROBE_EXT_ZFH = 0x8000000 + RISCV_HWPROBE_EXT_ZFHMIN = 0x10000000 + RISCV_HWPROBE_EXT_ZIHINTNTL = 0x20000000 + RISCV_HWPROBE_EXT_ZVFH = 0x40000000 + RISCV_HWPROBE_EXT_ZVFHMIN = 0x80000000 + RISCV_HWPROBE_EXT_ZFA = 0x100000000 + RISCV_HWPROBE_EXT_ZTSO = 0x200000000 + RISCV_HWPROBE_EXT_ZACAS = 0x400000000 + RISCV_HWPROBE_EXT_ZICOND = 0x800000000 + RISCV_HWPROBE_EXT_ZIHINTPAUSE = 0x1000000000 RISCV_HWPROBE_KEY_CPUPERF_0 = 0x5 RISCV_HWPROBE_MISALIGNED_UNKNOWN = 0x0 RISCV_HWPROBE_MISALIGNED_EMULATED = 0x1 @@ -734,4 +765,6 @@ const ( RISCV_HWPROBE_MISALIGNED_FAST = 0x3 RISCV_HWPROBE_MISALIGNED_UNSUPPORTED = 0x4 RISCV_HWPROBE_MISALIGNED_MASK = 0x7 + RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE = 0x6 + RISCV_HWPROBE_WHICH_CPUS = 0x1 ) diff --git a/vendor/golang.org/x/sys/unix/ztypes_zos_s390x.go b/vendor/golang.org/x/sys/unix/ztypes_zos_s390x.go index d9a13af..2e5d5a4 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_zos_s390x.go +++ b/vendor/golang.org/x/sys/unix/ztypes_zos_s390x.go @@ -377,6 +377,12 @@ type Flock_t struct { Pid int32 } +type F_cnvrt struct { + Cvtcmd int32 + Pccsid int16 + Fccsid int16 +} + type Termios struct { Cflag uint32 Iflag uint32 diff --git a/vendor/modules.txt b/vendor/modules.txt index 65efc59..8cb6bb9 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -2,7 +2,7 @@ ## explicit; go 1.21 github.com/SovereignCloudStack/cluster-stack-operator/pkg/kubernetesversion github.com/SovereignCloudStack/cluster-stack-operator/pkg/version -# github.com/SovereignCloudStack/csctl v0.0.3 +# github.com/SovereignCloudStack/csctl v0.0.4 ## explicit; go 1.21 github.com/SovereignCloudStack/csctl/pkg/clusterstack github.com/SovereignCloudStack/csctl/pkg/hash @@ -15,7 +15,7 @@ github.com/fatih/color # github.com/go-ini/ini v1.67.0 ## explicit github.com/go-ini/ini -# github.com/goccy/go-json v0.10.3 +# github.com/goccy/go-json v0.10.4 ## explicit; go 1.19 github.com/goccy/go-json github.com/goccy/go-json/internal/decoder @@ -39,7 +39,7 @@ github.com/goccy/go-yaml/token # github.com/google/uuid v1.6.0 ## explicit github.com/google/uuid -# github.com/gophercloud/gophercloud v1.14.0 +# github.com/gophercloud/gophercloud v1.14.1 ## explicit; go 1.14 github.com/gophercloud/gophercloud github.com/gophercloud/gophercloud/openstack/imageservice/v2/images @@ -48,12 +48,12 @@ github.com/gophercloud/gophercloud/pagination # github.com/inconshreveable/mousetrap v1.1.0 ## explicit; go 1.18 github.com/inconshreveable/mousetrap -# github.com/klauspost/compress v1.17.9 -## explicit; go 1.20 +# github.com/klauspost/compress v1.17.11 +## explicit; go 1.21 github.com/klauspost/compress/internal/race github.com/klauspost/compress/s2 -# github.com/klauspost/cpuid/v2 v2.2.8 -## explicit; go 1.15 +# github.com/klauspost/cpuid/v2 v2.2.9 +## explicit; go 1.20 github.com/klauspost/cpuid/v2 # github.com/kr/pretty v0.3.1 ## explicit; go 1.12 @@ -66,8 +66,8 @@ github.com/mattn/go-isatty # github.com/minio/md5-simd v1.1.2 ## explicit; go 1.14 github.com/minio/md5-simd -# github.com/minio/minio-go/v7 v7.0.76 -## explicit; go 1.21 +# github.com/minio/minio-go/v7 v7.0.83 +## explicit; go 1.22 github.com/minio/minio-go/v7 github.com/minio/minio-go/v7/pkg/cors github.com/minio/minio-go/v7/pkg/credentials @@ -89,23 +89,23 @@ github.com/spf13/cobra # github.com/spf13/pflag v1.0.5 ## explicit; go 1.12 github.com/spf13/pflag -# golang.org/x/crypto v0.26.0 +# golang.org/x/crypto v0.31.0 ## explicit; go 1.20 golang.org/x/crypto/argon2 golang.org/x/crypto/blake2b # golang.org/x/mod v0.17.0 ## explicit; go 1.18 golang.org/x/mod/sumdb/dirhash -# golang.org/x/net v0.28.0 +# golang.org/x/net v0.33.0 ## explicit; go 1.18 golang.org/x/net/http/httpguts golang.org/x/net/idna golang.org/x/net/publicsuffix -# golang.org/x/sys v0.24.0 +# golang.org/x/sys v0.28.0 ## explicit; go 1.18 golang.org/x/sys/cpu golang.org/x/sys/unix -# golang.org/x/text v0.17.0 +# golang.org/x/text v0.21.0 ## explicit; go 1.18 golang.org/x/text/secure/bidirule golang.org/x/text/transform