From deedf077c850fa025b26ce8392582cfcf21f820b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Fri, 30 Jun 2023 03:45:19 +0200 Subject: [PATCH 01/31] specs --- spec/lz4_spec.cr | 81 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/spec/lz4_spec.cr b/spec/lz4_spec.cr index a64f174..6d9af08 100644 --- a/spec/lz4_spec.cr +++ b/spec/lz4_spec.cr @@ -1,4 +1,85 @@ require "./spec_helper" describe Compress::LZ4 do + it "can compress" do + input = IO::Memory.new("foobar" * 100000) + output = IO::Memory.new + Compress::LZ4::Writer.open(output) do |lz4| + IO.copy(input, lz4) + end + output.bytesize.should be < input.bytesize + end + + it "can decompress" do + src_str = "foobar" * 100000 + input = IO::Memory.new(src_str) + compressed = IO::Memory.new + Compress::LZ4::Writer.open(compressed) do |lz4| + cnt = IO.copy(input, lz4) + puts "wrote #{cnt} bytes" + end + compressed.rewind + + output = IO::Memory.new + Compress::LZ4::Reader.open(compressed) do |lz4| + cnt = IO.copy(lz4, output) + puts "read #{cnt} bytes" + end + str = output.to_s + str.bytesize.should eq src_str.bytesize + str.should eq src_str + end + + it "can decompress small parts" do + input = IO::Memory.new("foobar" * 100000) + output = IO::Memory.new + Compress::LZ4::Writer.open(output) do |lz4| + IO.copy(input, lz4) + end + output.rewind + reader = Compress::LZ4::Reader.new(output) + reader.read_string(6).should eq "foobar" + reader.close + end + + it "can stream large amounts" do + src = "a" * 1024**2 + output = IO::Memory.new + writer = Compress::LZ4::Writer.new(output) + writer.write src.to_slice + output.rewind + reader = Compress::LZ4::Reader.new(output) + dst = Bytes.new(1024**2) + read_count = reader.read(dst) + read_count.should eq 1024**2 + reader.close + end + + it "can rewind" do + src = "a" * 1024**2 + output = IO::Memory.new + writer = Compress::LZ4::Writer.new(output) + writer.write src.to_slice + output.rewind + reader = Compress::LZ4::Reader.new(output) + dst = Bytes.new(1024**2) + read_count = reader.read(dst) + read_count.should eq 1024**2 + reader.rewind + read_count = reader.read(dst) + read_count.should eq 1024**2 + reader.close + end + + it "should raise if not fully decompressed on close" do + src = "a" * 1024**2 + output = IO::Memory.new + writer = Compress::LZ4::Writer.new(output) + writer.write src.to_slice + output.rewind + reader = Compress::LZ4::Reader.new(output) + dst = Bytes.new(1024) + read_count = reader.read(dst) + reader.close + end end From 90c052c345d8760e0c5f84ee709a512bfd3cbec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Fri, 30 Jun 2023 03:45:26 +0200 Subject: [PATCH 02/31] Link attribute will automatically use pkg-config --- src/lz4/lib.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lz4/lib.cr b/src/lz4/lib.cr index 84e4a6d..c5cdca3 100644 --- a/src/lz4/lib.cr +++ b/src/lz4/lib.cr @@ -1,5 +1,5 @@ module Compress::LZ4 - @[Link(ldflags: "`command -v pkg-config > /dev/null && pkg-config --libs liblz4 2> /dev/null|| printf %s '--llz4'`")] + @[Link("lz4")] lib LibLZ4 alias ErrorCodeT = LibC::SizeT alias Uint32T = LibC::UInt From 0e9a73af58b7233d780adaba89e41675bfdc04ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Fri, 30 Jun 2023 03:46:24 +0200 Subject: [PATCH 03/31] refactor reader --- src/lz4/reader.cr | 98 +++++++++++++++++++++-------------------------- 1 file changed, 43 insertions(+), 55 deletions(-) diff --git a/src/lz4/reader.cr b/src/lz4/reader.cr index e18bf36..55905f0 100644 --- a/src/lz4/reader.cr +++ b/src/lz4/reader.cr @@ -15,27 +15,15 @@ # pp string # ``` class Compress::LZ4::Reader < IO - include IO::Buffered - - # If `#sync_close?` is `true`, closing this IO will close the underlying IO. property? sync_close : Bool - - # Returns `true` if this reader is closed. getter? closed = false - @context : LibLZ4::Dctx - # buffer size that avoids execessive round-trips between C and Crystal but doesn't waste too much - # memory on buffering. Its arbitrarily chosen. - BUF_SIZE = 64 * 1024 - - # Creates an instance of LZ4::Reader. - def initialize(@io : IO, @sync_close : Bool = false) - @buffer = Bytes.new(BUF_SIZE) - @chunk = Bytes.empty - + def initialize(@io : IO, @sync_close = false) ret = LibLZ4.create_decompression_context(out @context, LibLZ4::VERSION) - raise LZ4Error.new("Unable to create lz4 decoder instance: #{String.new(LibLZ4.get_error_name(ret))}") unless LibLZ4.is_error(ret) == 0 + raise_if_error(ret, "Failed to create decompression context") + @buffer = Bytes.new(DEFAULT_BUFFER_SIZE) + @chunk = Bytes.empty end # Creates a new reader from the given *io*, yields it to the given block, @@ -65,65 +53,65 @@ class Compress::LZ4::Reader < IO end # Always raises `IO::Error` because this is a read-only `IO`. - def unbuffered_write(slice : Bytes) + def write(slice : Bytes) : Nil raise IO::Error.new "Can't write to LZ4::Reader" end - def unbuffered_read(slice : Bytes) + def read(slice : Bytes) : Int check_open - return 0 if slice.empty? - if @chunk.empty? - m = @io.read(@buffer) - return m if m == 0 - @chunk = @buffer[0, m] - end + refill_buffer if @chunk.empty? + opts = LibLZ4::DecompressOptionsT.new(stable_dst: 1) + decompressed_bytes = 0 loop do - in_remaining = @chunk.size.to_u64 - out_remaining = slice.size.to_u64 - - in_ptr = @chunk.to_unsafe - out_ptr = slice.to_unsafe - - ret = LibLZ4.decompress(@context, out_ptr, pointerof(out_remaining), in_ptr, pointerof(in_remaining), nil) - raise LZ4Error.new("lz4 decompression error: #{String.new(LibLZ4.get_error_name(ret))}") unless LibLZ4.is_error(ret) == 0 - - @chunk = @chunk[in_remaining..] - return out_remaining if ret == 0 - - if out_remaining == 0 - # Probably ran out of data and buffer needs a refill - enc_n = @io.read(@buffer) - return 0 if enc_n == 0 - @chunk = @buffer[0, enc_n] - next - end - - return out_remaining + src_remaining = @chunk.size.to_u64 + dst_remaining = slice.size.to_u64 + + ret = LibLZ4.decompress(@context, slice, pointerof(dst_remaining), @chunk, pointerof(src_remaining), pointerof(opts)) + raise_if_error(ret, "Failed to decompress") + + @chunk = @chunk + src_remaining + slice = slice + dst_remaining + decompressed_bytes += dst_remaining + break if slice.empty? # got all we needed + break if dst_remaining.zero? # didn't progress + STDERR.puts "hint=#{ret}" + refill_buffer if ret > 0 # ret is a hint of how much more src data is needed end - 0 + decompressed_bytes end - def unbuffered_flush + def flush raise IO::Error.new "Can't flush LZ4::Reader" end - # Closes this reader. - def unbuffered_close - return if @closed || @context.nil? + def close + check_open @closed = true - - LibLZ4.free_decompression_context(@context) @io.close if @sync_close end - def unbuffered_rewind - check_open + def finalize + LibLZ4.free_decompression_context(@context) + end + def rewind @io.rewind - initialize(@io, @sync_close) + LibLZ4.reset_decompression_context(@context) + end + + private def refill_buffer + cnt = @io.read(@buffer) + STDERR.puts "refilling buffer, got=#{cnt}" + @chunk = @buffer[0, cnt] + end + + private def raise_if_error(ret : Int, msg : String) + if LibLZ4.is_error(ret) != 0 + raise LZ4Error.new("#{msg}: #{String.new(LibLZ4.get_error_name(ret))}") + end end # :nodoc: From 91564d2176e974f09920a1a219b27b103c8a7f1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Fri, 30 Jun 2023 03:46:46 +0200 Subject: [PATCH 04/31] refactor writer --- src/lz4/writer.cr | 107 ++++++++++++++++++++++++---------------------- 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/src/lz4/writer.cr b/src/lz4/writer.cr index cecaa8c..1973109 100644 --- a/src/lz4/writer.cr +++ b/src/lz4/writer.cr @@ -1,3 +1,5 @@ +require "./lib" + # A write-only `IO` object to compress data in the LZ4 format. # # Instances of this class wrap another `IO` object. When you write to this @@ -22,22 +24,25 @@ # end # ``` class Compress::LZ4::Writer < IO - # If `#sync_close?` is `true`, closing this IO will close the underlying IO. property? sync_close : Bool + getter? closed = false @context : LibLZ4::Cctx - CHUNK_SIZE = 64 * 1024 @pref : LibLZ4::PreferencesT + @header_written = false - def initialize(@output : IO, options : CompressOptions = WriterOptions.default, @sync_close : Bool = false) + def initialize(@output : IO, options = CompressOptions.new, @sync_close = false) ret = LibLZ4.create_compression_context(out @context, LibLZ4::VERSION) - raise LZ4Error.new("Unable to create lz4 encoder instance: #{String.new(LibLZ4.get_error_name(ret))}") unless LibLZ4.is_error(ret) == 0 - + raise_if_error(ret, "Failed to create compression context") @pref = options.to_preferences - buf_size = LibLZ4.compress_frame_bound(CHUNK_SIZE, pointerof(@pref)) - @buffer = Bytes.new(buf_size) - - @header_written = false - @closed = false + @block_size = case options.block_size + in BlockSize::Default then 64 * 1024 + in BlockSize::Max64Kb then 64 * 1024 + in BlockSize::Max256Kb then 256 * 1024 + in BlockSize::Max1Mb then 1024 * 1024 + in BlockSize::Max4Mb then 4 * 1024 * 1024 + end + buffer_size = LibLZ4.compress_frame_bound(@block_size, pointerof(@pref)) + @buffer = Bytes.new(buffer_size) end # Creates a new writer to the given *filename*. @@ -48,7 +53,7 @@ class Compress::LZ4::Writer < IO # Creates a new writer to the given *io*, yields it to the given block, # and closes it at the end. def self.open(io : IO, options : CompressOptions = CompressOptions.default, sync_close = false) - writer = new(io, preset: preset, sync_close: sync_close) + writer = new(io, options: options, sync_close: sync_close) yield writer ensure writer.close end @@ -66,76 +71,75 @@ class Compress::LZ4::Writer < IO yield writer ensure writer.close end - # Always raises `IO::Error` because this is a write-only `IO`. def read(slice : Bytes) raise IO::Error.new "Can't read from LZ4::Writer" end private def write_header return if @header_written - @buffer.to_unsafe.clear(@buffer.size) - header_size = LibLZ4.compress_begin(@context, @buffer.to_unsafe, @buffer.size, pointerof(@pref)) - raise LZ4Error.new("Failed to start compression: #{String.new(LibLZ4.get_error_name(header_size))}") unless LibLZ4.is_error(header_size) == 0 - @output.write(@buffer[...header_size]) if header_size > 0 + ret = LibLZ4.compress_begin(@context, @buffer, @buffer.size, nil) + raise_if_error(ret, "Failed to begin compression") + @output.write(@buffer[0, ret]) @header_written = true end - # See `IO#write`. def write(slice : Bytes) : Nil check_open - return 0i64 if slice.empty? write_header - while slice.size > 0 - write_size = slice.size - write_size = @buffer.size if write_size > @buffer.size - @buffer.to_unsafe.clear(@buffer.size) - - comp_size = LibLZ4.compress_update(@context, @buffer.to_unsafe, @buffer.size, slice.to_unsafe, write_size, nil) - raise LZ4Error.new("Compression failed: #{String.new(LibLZ4.get_error_name(comp_size))}") unless LibLZ4.is_error(comp_size) == 0 - @output.write(@buffer[...comp_size]) if comp_size > 0 - # 0 means data was buffered, to avoid buffer too small problem at end, - # let's flush the data manually - flush if comp_size == 0 - slice = slice[write_size..] + opts = LibLZ4::CompressOptionsT.new(stable_src: 1) + until slice.empty? + read_size = Math.min(slice.size, @block_size) + ret = LibLZ4.compress_update(@context, @buffer, @buffer.size, slice, read_size, pointerof(opts)) + raise_if_error(ret, "Failed to compress") + @output.write(@buffer[0, ret]) + slice = slice + read_size end end - # See `IO#flush`. - def flush - return if @closed - @buffer.to_unsafe.clear(@buffer.size) - - ret = LibLZ4.flush(@context, @buffer.to_unsafe, @buffer.size, nil) - raise LZ4Error.new("Flush failed: #{String.new(LibLZ4.get_error_name(ret))}") unless LibLZ4.is_error(ret) == 0 - @output.write(@buffer[...ret]) if ret > 0 + def flush : Nil + check_open + ret = LibLZ4.flush(@context, @buffer, @buffer.size, nil) + raise_if_error(ret, "Failed to flush") + @output.write(@buffer[0, ret]) + @output.flush end - # Closes this writer. Must be invoked after all data has been written. + # Ends a LZ4 frame, the stream can still be written to, unless @sync_close def close - return if @closed || @context.nil? - - @buffer.to_unsafe.clear(@buffer.size) - comp_size = LibLZ4.compress_end(@context, @buffer.to_unsafe, @buffer.size, nil) - raise LZ4Error.new("Failed to end compression: #{String.new(LibLZ4.get_error_name(comp_size))}") unless LibLZ4.is_error(comp_size) == 0 - @output.write(@buffer[...comp_size]) if comp_size > 0 + check_open + ret = LibLZ4.compress_end(@context, @buffer, @buffer.size, nil) + raise_if_error(ret, "Failed to end compression") + @output.write(@buffer[0, ret]) @header_written = false + ensure + if @sync_close + @closed = true # the stream can still be written to + @output.close + end + end + def finalize LibLZ4.free_compression_context(@context) - @closed = true - @output.close if @sync_close end - # Returns `true` if this IO is closed. - def closed? + def closed? : Bool @closed end + private def raise_if_error(ret : Int, msg : String) + unless LibLZ4.is_error(ret).zero? + raise LZ4Error.new("#{msg}: #{String.new(LibLZ4.get_error_name(ret))}") + end + end + # :nodoc: def inspect(io : IO) : Nil to_s(io) end end +alias Compress::LZ4::BlockSize = Compress::LZ4::LibLZ4::BlockSizeIdT + struct Compress::LZ4::CompressOptions enum CompressionLevel FAST = 0 @@ -144,15 +148,14 @@ struct Compress::LZ4::CompressOptions OPT_MIN = 10 MAX = 12 end - # block size - property block_size : LibLZ4::BlockSizeIdT + property block_size : BlockSize property block_mode_linked : Bool property checksum : Bool property compression_level : CompressionLevel property auto_flush : Bool property favor_decompression_speed : Bool - def initialize(@block_size = LibLZ4::BlockSizeIdT::Max256Kb, @block_mode_linked = true, @checksum = false, + def initialize(@block_size = BlockSize::Default, @block_mode_linked = true, @checksum = false, @compression_level = CompressionLevel::FAST, @auto_flush = false, @favor_decompression_speed = false) end From 21610674cc8ccfdde729c8bad6ac9cec8809f74e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Fri, 30 Jun 2023 03:47:10 +0200 Subject: [PATCH 05/31] fixup! refactor reader --- src/lz4/reader.cr | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lz4/reader.cr b/src/lz4/reader.cr index 55905f0..6119617 100644 --- a/src/lz4/reader.cr +++ b/src/lz4/reader.cr @@ -1,10 +1,12 @@ +require "./lib" + # A read-only `IO` object to decompress data in the LZ4 frame format. # # Instances of this class wrap another IO object. When you read from this instance # instance, it reads data from the underlying IO, decompresses it, and returns # it to the caller. # ## Example: decompress an lz4 file -# ```crystal +# ``` # require "lz4" # string = File.open("file.lz4") do |file| From f9c259f8d6b7be77bf310c7449d07ad46688ddb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Fri, 30 Jun 2023 04:08:23 +0200 Subject: [PATCH 06/31] more data in specs --- spec/lz4_spec.cr | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/spec/lz4_spec.cr b/spec/lz4_spec.cr index 6d9af08..0e4cf89 100644 --- a/spec/lz4_spec.cr +++ b/spec/lz4_spec.cr @@ -11,23 +11,21 @@ describe Compress::LZ4 do end it "can decompress" do - src_str = "foobar" * 100000 - input = IO::Memory.new(src_str) + bytes = Random::DEFAULT.random_bytes(10 * 1024**2) + input = IO::Memory.new(bytes) compressed = IO::Memory.new - Compress::LZ4::Writer.open(compressed) do |lz4| - cnt = IO.copy(input, lz4) - puts "wrote #{cnt} bytes" - end + writer = Compress::LZ4::Writer.new(compressed) + writer.write bytes + writer.close + compressed.rewind output = IO::Memory.new Compress::LZ4::Reader.open(compressed) do |lz4| cnt = IO.copy(lz4, output) - puts "read #{cnt} bytes" end - str = output.to_s - str.bytesize.should eq src_str.bytesize - str.should eq src_str + output.bytesize.should eq bytes.bytesize + output.to_slice.should eq bytes end it "can decompress small parts" do From 9a210187f22b3a51cbb4ea98c5753fe0516870ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Fri, 30 Jun 2023 04:08:36 +0200 Subject: [PATCH 07/31] rm puts from reader --- src/lz4/reader.cr | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/lz4/reader.cr b/src/lz4/reader.cr index 6119617..ec427c7 100644 --- a/src/lz4/reader.cr +++ b/src/lz4/reader.cr @@ -59,7 +59,7 @@ class Compress::LZ4::Reader < IO raise IO::Error.new "Can't write to LZ4::Reader" end - def read(slice : Bytes) : Int + def read(slice : Bytes) : Int32 check_open return 0 if slice.empty? @@ -79,8 +79,7 @@ class Compress::LZ4::Reader < IO decompressed_bytes += dst_remaining break if slice.empty? # got all we needed break if dst_remaining.zero? # didn't progress - STDERR.puts "hint=#{ret}" - refill_buffer if ret > 0 # ret is a hint of how much more src data is needed + refill_buffer if ret > 0 # ret is a hint of how much more src data is needed end decompressed_bytes end @@ -106,7 +105,6 @@ class Compress::LZ4::Reader < IO private def refill_buffer cnt = @io.read(@buffer) - STDERR.puts "refilling buffer, got=#{cnt}" @chunk = @buffer[0, cnt] end From ad737f16c83daabdef425d83554cc6a5301e48d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Fri, 30 Jun 2023 12:12:13 +0200 Subject: [PATCH 08/31] Writer#flush ends a lz4 frame LZ4.flush doesn't always flush eveything, only ending the frame seems to do it. --- src/lz4/writer.cr | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/lz4/writer.cr b/src/lz4/writer.cr index 1973109..b4d5022 100644 --- a/src/lz4/writer.cr +++ b/src/lz4/writer.cr @@ -96,24 +96,22 @@ class Compress::LZ4::Writer < IO end end + # Ends a LZ4 frame def flush : Nil check_open - ret = LibLZ4.flush(@context, @buffer, @buffer.size, nil) - raise_if_error(ret, "Failed to flush") + ret = LibLZ4.compress_end(@context, @buffer, @buffer.size, nil) + raise_if_error(ret, "Failed to end compression") @output.write(@buffer[0, ret]) @output.flush + @header_written = false end - # Ends a LZ4 frame, the stream can still be written to, unless @sync_close + # Ends the current frame, the stream can still be written to, unless @sync_close def close - check_open - ret = LibLZ4.compress_end(@context, @buffer, @buffer.size, nil) - raise_if_error(ret, "Failed to end compression") - @output.write(@buffer[0, ret]) - @header_written = false + flush ensure if @sync_close - @closed = true # the stream can still be written to + @closed = true # the stream can still be written until the underlaying io is closed @output.close end end From 1596fb93bdcc63f97c4888d7e958626f420f3342 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Fri, 30 Jun 2023 12:12:27 +0200 Subject: [PATCH 09/31] Provide compression stats --- src/lz4/reader.cr | 9 +++++++++ src/lz4/writer.cr | 11 +++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/lz4/reader.cr b/src/lz4/reader.cr index ec427c7..f87cd9b 100644 --- a/src/lz4/reader.cr +++ b/src/lz4/reader.cr @@ -20,6 +20,13 @@ class Compress::LZ4::Reader < IO property? sync_close : Bool getter? closed = false @context : LibLZ4::Dctx + getter compressed_bytes = 0u64 + getter uncompressed_bytes = 0u64 + + def compression_ratio : Float64 + return 0.0 if @compressed_bytes.zero? + @uncompressed_bytes / @compressed_bytes + end def initialize(@io : IO, @sync_close = false) ret = LibLZ4.create_decompression_context(out @context, LibLZ4::VERSION) @@ -81,6 +88,7 @@ class Compress::LZ4::Reader < IO break if dst_remaining.zero? # didn't progress refill_buffer if ret > 0 # ret is a hint of how much more src data is needed end + @uncompressed_bytes &+= decompressed_bytes decompressed_bytes end @@ -105,6 +113,7 @@ class Compress::LZ4::Reader < IO private def refill_buffer cnt = @io.read(@buffer) + @compressed_bytes &+= cnt @chunk = @buffer[0, cnt] end diff --git a/src/lz4/writer.cr b/src/lz4/writer.cr index b4d5022..6e92dea 100644 --- a/src/lz4/writer.cr +++ b/src/lz4/writer.cr @@ -29,6 +29,13 @@ class Compress::LZ4::Writer < IO @context : LibLZ4::Cctx @pref : LibLZ4::PreferencesT @header_written = false + getter compressed_bytes = 0u64 + getter uncompressed_bytes = 0u64 + + def compression_ratio : Float64 + return 0.0 if @compressed_bytes.zero? + @uncompressed_bytes / @compressed_bytes + end def initialize(@output : IO, options = CompressOptions.new, @sync_close = false) ret = LibLZ4.create_compression_context(out @context, LibLZ4::VERSION) @@ -79,6 +86,7 @@ class Compress::LZ4::Writer < IO return if @header_written ret = LibLZ4.compress_begin(@context, @buffer, @buffer.size, nil) raise_if_error(ret, "Failed to begin compression") + @compressed_bytes &+= ret @output.write(@buffer[0, ret]) @header_written = true end @@ -86,11 +94,13 @@ class Compress::LZ4::Writer < IO def write(slice : Bytes) : Nil check_open write_header + @uncompressed_bytes &+= slice.size opts = LibLZ4::CompressOptionsT.new(stable_src: 1) until slice.empty? read_size = Math.min(slice.size, @block_size) ret = LibLZ4.compress_update(@context, @buffer, @buffer.size, slice, read_size, pointerof(opts)) raise_if_error(ret, "Failed to compress") + @compressed_bytes &+= ret @output.write(@buffer[0, ret]) slice = slice + read_size end @@ -101,6 +111,7 @@ class Compress::LZ4::Writer < IO check_open ret = LibLZ4.compress_end(@context, @buffer, @buffer.size, nil) raise_if_error(ret, "Failed to end compression") + @compressed_bytes &+= ret @output.write(@buffer[0, ret]) @output.flush @header_written = false From b75fbf80c79c9fd2eabb5505d52bfbc733e60d1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Fri, 30 Jun 2023 12:53:17 +0200 Subject: [PATCH 10/31] more specs --- spec/lz4_spec.cr | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/spec/lz4_spec.cr b/spec/lz4_spec.cr index 0e4cf89..bd246bc 100644 --- a/spec/lz4_spec.cr +++ b/spec/lz4_spec.cr @@ -69,15 +69,17 @@ describe Compress::LZ4 do reader.close end - it "should raise if not fully decompressed on close" do - src = "a" * 1024**2 + it "can not read more than there is" do + src = "a" output = IO::Memory.new writer = Compress::LZ4::Writer.new(output) writer.write src.to_slice + writer.flush output.rewind reader = Compress::LZ4::Reader.new(output) dst = Bytes.new(1024) read_count = reader.read(dst) + read_count.should eq 1 reader.close end end From f5ff108d5618d4a08b12c3ff7791cc2dc96a6a0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Fri, 30 Jun 2023 12:53:32 +0200 Subject: [PATCH 11/31] bug fixes in reader --- src/lz4/reader.cr | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/lz4/reader.cr b/src/lz4/reader.cr index f87cd9b..2cdf82f 100644 --- a/src/lz4/reader.cr +++ b/src/lz4/reader.cr @@ -32,7 +32,7 @@ class Compress::LZ4::Reader < IO ret = LibLZ4.create_decompression_context(out @context, LibLZ4::VERSION) raise_if_error(ret, "Failed to create decompression context") @buffer = Bytes.new(DEFAULT_BUFFER_SIZE) - @chunk = Bytes.empty + @buffer_rem = Bytes.empty end # Creates a new reader from the given *io*, yields it to the given block, @@ -70,23 +70,23 @@ class Compress::LZ4::Reader < IO check_open return 0 if slice.empty? - refill_buffer if @chunk.empty? + refill_buffer opts = LibLZ4::DecompressOptionsT.new(stable_dst: 1) decompressed_bytes = 0 - loop do - src_remaining = @chunk.size.to_u64 + until @buffer_rem.empty? + src_remaining = @buffer_rem.size.to_u64 dst_remaining = slice.size.to_u64 - ret = LibLZ4.decompress(@context, slice, pointerof(dst_remaining), @chunk, pointerof(src_remaining), pointerof(opts)) + ret = LibLZ4.decompress(@context, slice, pointerof(dst_remaining), @buffer_rem, pointerof(src_remaining), pointerof(opts)) raise_if_error(ret, "Failed to decompress") - @chunk = @chunk + src_remaining - slice = slice + dst_remaining + @buffer_rem += src_remaining + slice += dst_remaining decompressed_bytes += dst_remaining - break if slice.empty? # got all we needed - break if dst_remaining.zero? # didn't progress - refill_buffer if ret > 0 # ret is a hint of how much more src data is needed + break if slice.empty? # got all we needed + break if ret.zero? # ret is a hint of how much more src data is needed + refill_buffer end @uncompressed_bytes &+= decompressed_bytes decompressed_bytes @@ -112,9 +112,10 @@ class Compress::LZ4::Reader < IO end private def refill_buffer + return unless @buffer_rem.empty? # never overwrite existing buffer cnt = @io.read(@buffer) @compressed_bytes &+= cnt - @chunk = @buffer[0, cnt] + @buffer_rem = @buffer[0, cnt] end private def raise_if_error(ret : Int, msg : String) From 66a5f3977c06d75367377595d64df16b92fc2a03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Fri, 30 Jun 2023 14:08:16 +0200 Subject: [PATCH 12/31] 64kb read buffer (default chunk size) --- src/lz4/reader.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lz4/reader.cr b/src/lz4/reader.cr index 2cdf82f..4178c17 100644 --- a/src/lz4/reader.cr +++ b/src/lz4/reader.cr @@ -31,7 +31,7 @@ class Compress::LZ4::Reader < IO def initialize(@io : IO, @sync_close = false) ret = LibLZ4.create_decompression_context(out @context, LibLZ4::VERSION) raise_if_error(ret, "Failed to create decompression context") - @buffer = Bytes.new(DEFAULT_BUFFER_SIZE) + @buffer = Bytes.new(64 * 1024) @buffer_rem = Bytes.empty end From 7b4345f25208a704eb499d6bb1991bd895985ab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Fri, 30 Jun 2023 14:08:44 +0200 Subject: [PATCH 13/31] writer src is not stable --- src/lz4/reader.cr | 6 ++++-- src/lz4/writer.cr | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/lz4/reader.cr b/src/lz4/reader.cr index 4178c17..3eafa45 100644 --- a/src/lz4/reader.cr +++ b/src/lz4/reader.cr @@ -98,8 +98,10 @@ class Compress::LZ4::Reader < IO def close check_open - @closed = true - @io.close if @sync_close + if @sync_close + @closed = true # Only really closed if io is closed + @io.close + end end def finalize diff --git a/src/lz4/writer.cr b/src/lz4/writer.cr index 6e92dea..590f449 100644 --- a/src/lz4/writer.cr +++ b/src/lz4/writer.cr @@ -95,7 +95,7 @@ class Compress::LZ4::Writer < IO check_open write_header @uncompressed_bytes &+= slice.size - opts = LibLZ4::CompressOptionsT.new(stable_src: 1) + opts = LibLZ4::CompressOptionsT.new(stable_src: 0) until slice.empty? read_size = Math.min(slice.size, @block_size) ret = LibLZ4.compress_update(@context, @buffer, @buffer.size, slice, read_size, pointerof(opts)) From 9ee6505b43f7ac26f89785bc710fbd52a9e5f6e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Fri, 30 Jun 2023 22:47:42 +0200 Subject: [PATCH 14/31] more bugfixes --- src/lz4/reader.cr | 17 ++++++++++------- src/lz4/writer.cr | 14 +++++++------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/lz4/reader.cr b/src/lz4/reader.cr index 3eafa45..c812884 100644 --- a/src/lz4/reader.cr +++ b/src/lz4/reader.cr @@ -31,7 +31,7 @@ class Compress::LZ4::Reader < IO def initialize(@io : IO, @sync_close = false) ret = LibLZ4.create_decompression_context(out @context, LibLZ4::VERSION) raise_if_error(ret, "Failed to create decompression context") - @buffer = Bytes.new(64 * 1024) + @buffer = Bytes.new(32 * 1024) @buffer_rem = Bytes.empty end @@ -72,7 +72,7 @@ class Compress::LZ4::Reader < IO refill_buffer - opts = LibLZ4::DecompressOptionsT.new(stable_dst: 1) + opts = LibLZ4::DecompressOptionsT.new(stable_dst: 0) decompressed_bytes = 0 until @buffer_rem.empty? src_remaining = @buffer_rem.size.to_u64 @@ -86,7 +86,7 @@ class Compress::LZ4::Reader < IO decompressed_bytes += dst_remaining break if slice.empty? # got all we needed break if ret.zero? # ret is a hint of how much more src data is needed - refill_buffer + refill_buffer(ret) end @uncompressed_bytes &+= decompressed_bytes decompressed_bytes @@ -97,10 +97,9 @@ class Compress::LZ4::Reader < IO end def close - check_open if @sync_close - @closed = true # Only really closed if io is closed @io.close + @closed = true # Only really closed if io is closed end end @@ -113,9 +112,13 @@ class Compress::LZ4::Reader < IO LibLZ4.reset_decompression_context(@context) end - private def refill_buffer + private def refill_buffer(hint = nil) return unless @buffer_rem.empty? # never overwrite existing buffer - cnt = @io.read(@buffer) + if hint + cnt = @io.read(@buffer[0, Math.min(hint, @buffer.size)]) + else + cnt = @io.read(@buffer) + end @compressed_bytes &+= cnt @buffer_rem = @buffer[0, cnt] end diff --git a/src/lz4/writer.cr b/src/lz4/writer.cr index 590f449..ea10d39 100644 --- a/src/lz4/writer.cr +++ b/src/lz4/writer.cr @@ -41,6 +41,7 @@ class Compress::LZ4::Writer < IO ret = LibLZ4.create_compression_context(out @context, LibLZ4::VERSION) raise_if_error(ret, "Failed to create compression context") @pref = options.to_preferences + @opts = LibLZ4::CompressOptionsT.new(stable_src: 0) @block_size = case options.block_size in BlockSize::Default then 64 * 1024 in BlockSize::Max64Kb then 64 * 1024 @@ -53,27 +54,27 @@ class Compress::LZ4::Writer < IO end # Creates a new writer to the given *filename*. - def self.new(filename : String, options : CompressOptions = CompressOptions.default) + def self.new(filename : String, options = CompressOptions.nw) new(::File.new(filename, "w"), options: options, sync_close: true) end # Creates a new writer to the given *io*, yields it to the given block, # and closes it at the end. - def self.open(io : IO, options : CompressOptions = CompressOptions.default, sync_close = false) + def self.open(io : IO, options = CompressOptions.new, sync_close = false) writer = new(io, options: options, sync_close: sync_close) yield writer ensure writer.close end # Creates a new writer to the given *filename*, yields it to the given block, # and closes it at the end. - def self.open(filename : String, options : CompressOptions = CompressOptions.default) + def self.open(filename : String, options = CompressOptions.new) writer = new(filename, options: options) yield writer ensure writer.close end # Creates a new writer for the given *io*, yields it to the given block, # and closes it at its end. - def self.open(io : IO, options : CompressOptions = CompressOptions.default, sync_close : Bool = false) + def self.open(io : IO, options = CompressOptions.new, sync_close = false) writer = new(io, options: options, sync_close: sync_close) yield writer ensure writer.close end @@ -95,10 +96,9 @@ class Compress::LZ4::Writer < IO check_open write_header @uncompressed_bytes &+= slice.size - opts = LibLZ4::CompressOptionsT.new(stable_src: 0) until slice.empty? read_size = Math.min(slice.size, @block_size) - ret = LibLZ4.compress_update(@context, @buffer, @buffer.size, slice, read_size, pointerof(opts)) + ret = LibLZ4.compress_update(@context, @buffer, @buffer.size, slice, read_size, pointerof(@opts)) raise_if_error(ret, "Failed to compress") @compressed_bytes &+= ret @output.write(@buffer[0, ret]) @@ -109,7 +109,7 @@ class Compress::LZ4::Writer < IO # Ends a LZ4 frame def flush : Nil check_open - ret = LibLZ4.compress_end(@context, @buffer, @buffer.size, nil) + ret = LibLZ4.compress_end(@context, @buffer, @buffer.size, pointerof(@opts)) raise_if_error(ret, "Failed to end compression") @compressed_bytes &+= ret @output.write(@buffer[0, ret]) From 069d006132df11d975ccdf49f4c5da95b54243aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Sat, 1 Jul 2023 23:01:57 +0200 Subject: [PATCH 15/31] 64kb buffer in reader --- src/lz4/reader.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lz4/reader.cr b/src/lz4/reader.cr index c812884..66ad2ad 100644 --- a/src/lz4/reader.cr +++ b/src/lz4/reader.cr @@ -31,7 +31,7 @@ class Compress::LZ4::Reader < IO def initialize(@io : IO, @sync_close = false) ret = LibLZ4.create_decompression_context(out @context, LibLZ4::VERSION) raise_if_error(ret, "Failed to create decompression context") - @buffer = Bytes.new(32 * 1024) + @buffer = Bytes.new(64 * 1024) @buffer_rem = Bytes.empty end From 1ebac8c8311243745398d42d79e10e73b4dbf868 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Sat, 1 Jul 2023 23:05:33 +0200 Subject: [PATCH 16/31] data can in internal buffers of lz4, so doesn't always need more src --- src/lz4/reader.cr | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/lz4/reader.cr b/src/lz4/reader.cr index 66ad2ad..687338f 100644 --- a/src/lz4/reader.cr +++ b/src/lz4/reader.cr @@ -70,23 +70,24 @@ class Compress::LZ4::Reader < IO check_open return 0 if slice.empty? - refill_buffer - opts = LibLZ4::DecompressOptionsT.new(stable_dst: 0) decompressed_bytes = 0 - until @buffer_rem.empty? + hint = 0u64 # the hint from the last decompression + loop do src_remaining = @buffer_rem.size.to_u64 + src_remaining = Math.min(hint, src_remaining) unless hint.zero? dst_remaining = slice.size.to_u64 - ret = LibLZ4.decompress(@context, slice, pointerof(dst_remaining), @buffer_rem, pointerof(src_remaining), pointerof(opts)) - raise_if_error(ret, "Failed to decompress") + hint = LibLZ4.decompress(@context, slice, pointerof(dst_remaining), @buffer_rem, pointerof(src_remaining), pointerof(opts)) + raise_if_error(hint, "Failed to decompress") @buffer_rem += src_remaining slice += dst_remaining decompressed_bytes += dst_remaining break if slice.empty? # got all we needed - break if ret.zero? # ret is a hint of how much more src data is needed - refill_buffer(ret) + break if hint.zero? # hint of how much more src data is needed + refill_buffer + break if @buffer_rem.empty? end @uncompressed_bytes &+= decompressed_bytes decompressed_bytes @@ -112,13 +113,9 @@ class Compress::LZ4::Reader < IO LibLZ4.reset_decompression_context(@context) end - private def refill_buffer(hint = nil) + private def refill_buffer return unless @buffer_rem.empty? # never overwrite existing buffer - if hint - cnt = @io.read(@buffer[0, Math.min(hint, @buffer.size)]) - else - cnt = @io.read(@buffer) - end + cnt = @io.read(@buffer) @compressed_bytes &+= cnt @buffer_rem = @buffer[0, cnt] end From 9fe53b979f30986b94d9ecc15dd0a54921e6f646 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Sat, 1 Jul 2023 23:06:48 +0200 Subject: [PATCH 17/31] Let flush be flush in writer --- src/lz4/writer.cr | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/lz4/writer.cr b/src/lz4/writer.cr index ea10d39..e898769 100644 --- a/src/lz4/writer.cr +++ b/src/lz4/writer.cr @@ -109,17 +109,22 @@ class Compress::LZ4::Writer < IO # Ends a LZ4 frame def flush : Nil check_open - ret = LibLZ4.compress_end(@context, @buffer, @buffer.size, pointerof(@opts)) - raise_if_error(ret, "Failed to end compression") + ret = LibLZ4.flush(@context, @buffer, @buffer.size, pointerof(@opts)) + raise_if_error(ret, "Failed to flush") @compressed_bytes &+= ret @output.write(@buffer[0, ret]) @output.flush - @header_written = false end # Ends the current frame, the stream can still be written to, unless @sync_close def close - flush + check_open + ret = LibLZ4.compress_end(@context, @buffer, @buffer.size, pointerof(@opts)) + raise_if_error(ret, "Failed to end frame") + @compressed_bytes &+= ret + @output.write(@buffer[0, ret]) + @output.flush + @header_written = false ensure if @sync_close @closed = true # the stream can still be written until the underlaying io is closed From ecc70b649fa6c1202fad9fd97ed7aa0b25dbbf74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Sat, 1 Jul 2023 23:07:02 +0200 Subject: [PATCH 18/31] auto flush by default to use less internal buffers --- src/lz4/writer.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lz4/writer.cr b/src/lz4/writer.cr index e898769..f1b05ee 100644 --- a/src/lz4/writer.cr +++ b/src/lz4/writer.cr @@ -170,7 +170,7 @@ struct Compress::LZ4::CompressOptions property favor_decompression_speed : Bool def initialize(@block_size = BlockSize::Default, @block_mode_linked = true, @checksum = false, - @compression_level = CompressionLevel::FAST, @auto_flush = false, + @compression_level = CompressionLevel::FAST, @auto_flush = true, @favor_decompression_speed = false) end From ca57be6f03d4bada360b6209be4b897237858097 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Sat, 1 Jul 2023 23:17:11 +0200 Subject: [PATCH 19/31] no need to zero-init structs in crystal --- src/lz4/writer.cr | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lz4/writer.cr b/src/lz4/writer.cr index f1b05ee..4d12b2a 100644 --- a/src/lz4/writer.cr +++ b/src/lz4/writer.cr @@ -192,8 +192,6 @@ struct Compress::LZ4::CompressOptions pref.auto_flush = auto_flush ? 1 : 0 pref.favor_dec_speed = favor_decompression_speed ? 1 : 0 - pref.reserved = StaticArray[0_u32, 0_u32, 0_u32] - pref end end From 60993755485d7f5308d6698fe0efecdbd2a92f3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Sun, 2 Jul 2023 00:00:48 +0200 Subject: [PATCH 20/31] use compress_bound for buffer check --- src/lz4/writer.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lz4/writer.cr b/src/lz4/writer.cr index 4d12b2a..41e8ddc 100644 --- a/src/lz4/writer.cr +++ b/src/lz4/writer.cr @@ -49,7 +49,7 @@ class Compress::LZ4::Writer < IO in BlockSize::Max1Mb then 1024 * 1024 in BlockSize::Max4Mb then 4 * 1024 * 1024 end - buffer_size = LibLZ4.compress_frame_bound(@block_size, pointerof(@pref)) + buffer_size = LibLZ4.compress_bound(@block_size, pointerof(@pref)) @buffer = Bytes.new(buffer_size) end From 425dceace8dec2327d7586a90f076addbd84c2f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Sun, 2 Jul 2023 00:01:05 +0200 Subject: [PATCH 21/31] pass preferences to compress_begin --- src/lz4/writer.cr | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lz4/writer.cr b/src/lz4/writer.cr index 41e8ddc..a762d21 100644 --- a/src/lz4/writer.cr +++ b/src/lz4/writer.cr @@ -85,7 +85,7 @@ class Compress::LZ4::Writer < IO private def write_header return if @header_written - ret = LibLZ4.compress_begin(@context, @buffer, @buffer.size, nil) + ret = LibLZ4.compress_begin(@context, @buffer, @buffer.size, pointerof(@pref)) raise_if_error(ret, "Failed to begin compression") @compressed_bytes &+= ret @output.write(@buffer[0, ret]) @@ -102,7 +102,7 @@ class Compress::LZ4::Writer < IO raise_if_error(ret, "Failed to compress") @compressed_bytes &+= ret @output.write(@buffer[0, ret]) - slice = slice + read_size + slice += read_size end end From bc4ceb7426831f61866eebaca83ea23083086ca3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Sun, 2 Jul 2023 00:25:14 +0200 Subject: [PATCH 22/31] auto_flush default false auto flush true destroys compression ratio for small writes --- src/lz4/writer.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lz4/writer.cr b/src/lz4/writer.cr index a762d21..00404fb 100644 --- a/src/lz4/writer.cr +++ b/src/lz4/writer.cr @@ -170,7 +170,7 @@ struct Compress::LZ4::CompressOptions property favor_decompression_speed : Bool def initialize(@block_size = BlockSize::Default, @block_mode_linked = true, @checksum = false, - @compression_level = CompressionLevel::FAST, @auto_flush = true, + @compression_level = CompressionLevel::FAST, @auto_flush = false, @favor_decompression_speed = false) end From e79d94cea312ca4910147d7aecd2cc20c9a1a9a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Mon, 3 Jul 2023 23:50:43 +0200 Subject: [PATCH 23/31] more specs --- spec/lz4_spec.cr | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/spec/lz4_spec.cr b/spec/lz4_spec.cr index bd246bc..d2ca354 100644 --- a/spec/lz4_spec.cr +++ b/spec/lz4_spec.cr @@ -1,6 +1,14 @@ require "./spec_helper" describe Compress::LZ4 do + it "can encode and decode" do + text = "foobar" * 1000 + encoded = Compress::LZ4.encode(text) + encoded.size.should be < text.bytesize + decoded = Compress::LZ4.decode(encoded) + decoded.should eq text.to_slice + end + it "can compress" do input = IO::Memory.new("foobar" * 100000) output = IO::Memory.new @@ -82,4 +90,17 @@ describe Compress::LZ4 do read_count.should eq 1 reader.close end + + it "can compress and decompress small parts" do + rp, wp = IO.pipe + writer = Compress::LZ4::Writer.new(wp) + reader = Compress::LZ4::Reader.new(rp) + writer.print "foo" + writer.flush + reader.read_byte.should eq 'f'.ord + reader.read_byte.should eq 'o'.ord + reader.read_byte.should eq 'o'.ord + writer.close + reader.read_byte.should be_nil + end end From d4b08b4c962c96c6886e392b9826c2ca9777f932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Mon, 3 Jul 2023 23:50:52 +0200 Subject: [PATCH 24/31] more efficent decode method --- src/lz4.cr | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/lz4.cr b/src/lz4.cr index e7210f5..2732b6a 100644 --- a/src/lz4.cr +++ b/src/lz4.cr @@ -11,19 +11,20 @@ module Compress::LZ4 class LZ4Error < Exception end - def self.decode(compressed : Slice) - buf = IO::Memory.new(compressed) - uncompressed = Reader.open(buf) do |br| - br.gets_to_end + def self.decode(compressed : Bytes) : Bytes + input = IO::Memory.new(compressed) + output = IO::Memory.new + Reader.open(input) do |br| + IO.copy(br, output) end - uncompressed.to_slice + output.to_slice end def self.encode(content : String) encode(content.to_slice) end - def self.encode(content : Slice) + def self.encode(content : Bytes) buf = IO::Memory.new Writer.open(buf) do |br| br.write content @@ -33,4 +34,4 @@ module Compress::LZ4 end end -require "./**" +require "./lz4/*" From 45ba819b0cf311eb6f48050265f20b7e065c03ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Tue, 4 Jul 2023 22:00:03 +0200 Subject: [PATCH 25/31] Updated links and and filenames in readme + added contributer --- README.md | 8 ++++---- shard.yml | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 4408b00..85538dc 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # Crystal LZ4 Compression -Crystal bindings to the [LZ4](https://lz4.github.io/lz4/) compression library. Bindings provided in this shard cover the [frame format](https://github.com/lz4/lz4/blob/master/doc/lz4_Frame_format.md) as the frame format is recommended one to use and guarantees interoperability with other implementations and language bindings. - +Crystal bindings to the [LZ4](https://lz4.github.io/lz4/) compression library. Bindings provided in this shard cover the [frame format](https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md) as the frame format is recommended one to use and guarantees interoperability with other implementations and language bindings. LZ4 is lossless compression algorithm, providing compression speed > 500 MB/s per core (>0.15 Bytes/cycle). It features an extremely fast decoder, with speed in multiple GB/s per core (~1 Byte/cycle). @@ -30,7 +29,7 @@ require "lz4" ```crystal require "lz4" -string = File.open("file.xz") do |file| +string = File.open("file.lz4") do |file| Compress::LZ4::Reader.open(file) do |lz4| lz4.gets_to_end end @@ -46,7 +45,7 @@ require "lz4" File.write("file.txt", "abcd") File.open("./file.txt", "r") do |input_file| - File.open("./file.xz", "w") do |output_file| + File.open("./file.lz4", "w") do |output_file| Compress::LZ4::Writer.open(output_file) do |lz4| IO.copy(input_file, lz4) end @@ -66,3 +65,4 @@ end ## Contributors - [Ali Naqvi](https://github.com/naqvis) - creator and maintainer +- [Carl Hörberg](https://github.com/carlhoerberg) - creator and maintainer diff --git a/shard.yml b/shard.yml index 5d15a1c..42b5d48 100644 --- a/shard.yml +++ b/shard.yml @@ -3,6 +3,8 @@ version: 0.1.4 authors: - Ali Naqvi + - Carl Hörberg + description: | Crystal bindings to the LZ4 compression library. From f3e62c8547489b310cabd0d18c328f29f4a8d685 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Tue, 4 Jul 2023 22:01:08 +0200 Subject: [PATCH 26/31] version bump to 1.0.0 --- shard.yml | 2 +- src/lz4.cr | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/shard.yml b/shard.yml index 42b5d48..925fec2 100644 --- a/shard.yml +++ b/shard.yml @@ -1,5 +1,5 @@ name: lz4 -version: 0.1.4 +version: 1.0.0 authors: - Ali Naqvi diff --git a/src/lz4.cr b/src/lz4.cr index 2732b6a..d2d87f0 100644 --- a/src/lz4.cr +++ b/src/lz4.cr @@ -2,7 +2,7 @@ require "semantic_version" module Compress::LZ4 - VERSION = "0.1.4" + VERSION = "1.0.0" LZ4_VERSION = SemanticVersion.parse String.new(LibLZ4.version_string) LZ4_VERSION_MINIMUM = SemanticVersion.parse("1.9.2") From 3a515e6443bf7d663c1cbdab68541659b7ad9472 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Tue, 4 Jul 2023 22:02:51 +0200 Subject: [PATCH 27/31] readme contributors admendment --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 85538dc..8da3543 100644 --- a/README.md +++ b/README.md @@ -65,4 +65,4 @@ end ## Contributors - [Ali Naqvi](https://github.com/naqvis) - creator and maintainer -- [Carl Hörberg](https://github.com/carlhoerberg) - creator and maintainer +- [Carl Hörberg](https://github.com/carlhoerberg) From 9632b46715669de6568219f62e9e07cfd2b5809e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Tue, 4 Jul 2023 22:19:42 +0200 Subject: [PATCH 28/31] fix Reader#rewind --- src/lz4/reader.cr | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lz4/reader.cr b/src/lz4/reader.cr index 687338f..cf6a096 100644 --- a/src/lz4/reader.cr +++ b/src/lz4/reader.cr @@ -110,6 +110,10 @@ class Compress::LZ4::Reader < IO def rewind @io.rewind + @buffer.clear + @buffer_rem = Bytes.empty + @uncompressed_bytes = 0u64 + @compressed_bytes = 0u64 LibLZ4.reset_decompression_context(@context) end From 13bb4061045a20245ab18689458dfb1eeed88a5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Tue, 4 Jul 2023 22:20:10 +0200 Subject: [PATCH 29/31] refactoring --- src/lz4/reader.cr | 19 +++++++------------ src/lz4/writer.cr | 28 ++++++++++------------------ 2 files changed, 17 insertions(+), 30 deletions(-) diff --git a/src/lz4/reader.cr b/src/lz4/reader.cr index cf6a096..145258b 100644 --- a/src/lz4/reader.cr +++ b/src/lz4/reader.cr @@ -19,14 +19,10 @@ require "./lib" class Compress::LZ4::Reader < IO property? sync_close : Bool getter? closed = false - @context : LibLZ4::Dctx getter compressed_bytes = 0u64 getter uncompressed_bytes = 0u64 - - def compression_ratio : Float64 - return 0.0 if @compressed_bytes.zero? - @uncompressed_bytes / @compressed_bytes - end + @context : LibLZ4::Dctx + @opts = LibLZ4::DecompressOptionsT.new(stable_dst: 0) def initialize(@io : IO, @sync_close = false) ret = LibLZ4.create_decompression_context(out @context, LibLZ4::VERSION) @@ -69,8 +65,6 @@ class Compress::LZ4::Reader < IO def read(slice : Bytes) : Int32 check_open return 0 if slice.empty? - - opts = LibLZ4::DecompressOptionsT.new(stable_dst: 0) decompressed_bytes = 0 hint = 0u64 # the hint from the last decompression loop do @@ -78,7 +72,7 @@ class Compress::LZ4::Reader < IO src_remaining = Math.min(hint, src_remaining) unless hint.zero? dst_remaining = slice.size.to_u64 - hint = LibLZ4.decompress(@context, slice, pointerof(dst_remaining), @buffer_rem, pointerof(src_remaining), pointerof(opts)) + hint = LibLZ4.decompress(@context, slice, pointerof(dst_remaining), @buffer_rem, pointerof(src_remaining), pointerof(@opts)) raise_if_error(hint, "Failed to decompress") @buffer_rem += src_remaining @@ -130,8 +124,9 @@ class Compress::LZ4::Reader < IO end end - # :nodoc: - def inspect(io : IO) : Nil - to_s(io) + # Uncompressed bytes outputted / compressed bytes read so far in the stream + def compression_ratio : Float64 + return 0.0 if @compressed_bytes.zero? + @uncompressed_bytes / @compressed_bytes end end diff --git a/src/lz4/writer.cr b/src/lz4/writer.cr index 00404fb..13b6d53 100644 --- a/src/lz4/writer.cr +++ b/src/lz4/writer.cr @@ -26,22 +26,17 @@ require "./lib" class Compress::LZ4::Writer < IO property? sync_close : Bool getter? closed = false + getter compressed_bytes = 0u64 + getter uncompressed_bytes = 0u64 @context : LibLZ4::Cctx @pref : LibLZ4::PreferencesT + @opts = LibLZ4::CompressOptionsT.new(stable_src: 0) @header_written = false - getter compressed_bytes = 0u64 - getter uncompressed_bytes = 0u64 - - def compression_ratio : Float64 - return 0.0 if @compressed_bytes.zero? - @uncompressed_bytes / @compressed_bytes - end def initialize(@output : IO, options = CompressOptions.new, @sync_close = false) ret = LibLZ4.create_compression_context(out @context, LibLZ4::VERSION) raise_if_error(ret, "Failed to create compression context") @pref = options.to_preferences - @opts = LibLZ4::CompressOptionsT.new(stable_src: 0) @block_size = case options.block_size in BlockSize::Default then 64 * 1024 in BlockSize::Max64Kb then 64 * 1024 @@ -54,7 +49,7 @@ class Compress::LZ4::Writer < IO end # Creates a new writer to the given *filename*. - def self.new(filename : String, options = CompressOptions.nw) + def self.new(filename : String, options = CompressOptions.new) new(::File.new(filename, "w"), options: options, sync_close: true) end @@ -106,7 +101,7 @@ class Compress::LZ4::Writer < IO end end - # Ends a LZ4 frame + # Flush LZ4 lib buffers even if a block isn't full def flush : Nil check_open ret = LibLZ4.flush(@context, @buffer, @buffer.size, pointerof(@opts)) @@ -116,7 +111,7 @@ class Compress::LZ4::Writer < IO @output.flush end - # Ends the current frame, the stream can still be written to, unless @sync_close + # Ends the current LZ4 frame, the stream can still be written to, unless @sync_close def close check_open ret = LibLZ4.compress_end(@context, @buffer, @buffer.size, pointerof(@opts)) @@ -136,19 +131,16 @@ class Compress::LZ4::Writer < IO LibLZ4.free_compression_context(@context) end - def closed? : Bool - @closed - end - private def raise_if_error(ret : Int, msg : String) unless LibLZ4.is_error(ret).zero? raise LZ4Error.new("#{msg}: #{String.new(LibLZ4.get_error_name(ret))}") end end - # :nodoc: - def inspect(io : IO) : Nil - to_s(io) + # Uncompressed bytes read / compressed bytes outputted so far in the stream + def compression_ratio : Float64 + return 0.0 if @compressed_bytes.zero? + @uncompressed_bytes / @compressed_bytes end end From 4da19df0d59cd0617ec124268134642fbd197723 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Tue, 4 Jul 2023 22:43:20 +0200 Subject: [PATCH 30/31] fix reader rewind --- src/lz4/reader.cr | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lz4/reader.cr b/src/lz4/reader.cr index 145258b..9e7a2db 100644 --- a/src/lz4/reader.cr +++ b/src/lz4/reader.cr @@ -104,7 +104,6 @@ class Compress::LZ4::Reader < IO def rewind @io.rewind - @buffer.clear @buffer_rem = Bytes.empty @uncompressed_bytes = 0u64 @compressed_bytes = 0u64 From 070f5e5c9afd9314d744cdec37aad486f5ba985d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20H=C3=B6rberg?= Date: Tue, 4 Jul 2023 22:46:08 +0200 Subject: [PATCH 31/31] rewind spec --- spec/lz4_spec.cr | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/spec/lz4_spec.cr b/spec/lz4_spec.cr index d2ca354..599674c 100644 --- a/spec/lz4_spec.cr +++ b/spec/lz4_spec.cr @@ -103,4 +103,18 @@ describe Compress::LZ4 do writer.close reader.read_byte.should be_nil end + + it "can rewind a reader" do + input = IO::Memory.new("foobar" * 100000) + output = IO::Memory.new + Compress::LZ4::Writer.open(output) do |lz4| + IO.copy(input, lz4) + end + output.rewind + Compress::LZ4::Reader.open(output) do |lz4| + lz4.read_byte.should eq 'f'.ord + lz4.rewind + lz4.read_byte.should eq 'f'.ord + end + end end