diff --git a/README.md b/README.md index 4408b00..8da3543 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # Crystal LZ4 Compression -Crystal bindings to the [LZ4](https://lz4.github.io/lz4/) compression library. Bindings provided in this shard cover the [frame format](https://github.com/lz4/lz4/blob/master/doc/lz4_Frame_format.md) as the frame format is recommended one to use and guarantees interoperability with other implementations and language bindings. - +Crystal bindings to the [LZ4](https://lz4.github.io/lz4/) compression library. Bindings provided in this shard cover the [frame format](https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md) as the frame format is recommended one to use and guarantees interoperability with other implementations and language bindings. LZ4 is lossless compression algorithm, providing compression speed > 500 MB/s per core (>0.15 Bytes/cycle). It features an extremely fast decoder, with speed in multiple GB/s per core (~1 Byte/cycle). @@ -30,7 +29,7 @@ require "lz4" ```crystal require "lz4" -string = File.open("file.xz") do |file| +string = File.open("file.lz4") do |file| Compress::LZ4::Reader.open(file) do |lz4| lz4.gets_to_end end @@ -46,7 +45,7 @@ require "lz4" File.write("file.txt", "abcd") File.open("./file.txt", "r") do |input_file| - File.open("./file.xz", "w") do |output_file| + File.open("./file.lz4", "w") do |output_file| Compress::LZ4::Writer.open(output_file) do |lz4| IO.copy(input_file, lz4) end @@ -66,3 +65,4 @@ end ## Contributors - [Ali Naqvi](https://github.com/naqvis) - creator and maintainer +- [Carl Hörberg](https://github.com/carlhoerberg) diff --git a/shard.yml b/shard.yml index 5d15a1c..925fec2 100644 --- a/shard.yml +++ b/shard.yml @@ -1,8 +1,10 @@ name: lz4 -version: 0.1.4 +version: 1.0.0 authors: - Ali Naqvi + - Carl Hörberg + description: | Crystal bindings to the LZ4 compression library. diff --git a/spec/lz4_spec.cr b/spec/lz4_spec.cr index a64f174..599674c 100644 --- a/spec/lz4_spec.cr +++ b/spec/lz4_spec.cr @@ -1,4 +1,120 @@ require "./spec_helper" describe Compress::LZ4 do + it "can encode and decode" do + text = "foobar" * 1000 + encoded = Compress::LZ4.encode(text) + encoded.size.should be < text.bytesize + decoded = Compress::LZ4.decode(encoded) + decoded.should eq text.to_slice + end + + it "can compress" do + input = IO::Memory.new("foobar" * 100000) + output = IO::Memory.new + Compress::LZ4::Writer.open(output) do |lz4| + IO.copy(input, lz4) + end + output.bytesize.should be < input.bytesize + end + + it "can decompress" do + bytes = Random::DEFAULT.random_bytes(10 * 1024**2) + input = IO::Memory.new(bytes) + compressed = IO::Memory.new + writer = Compress::LZ4::Writer.new(compressed) + writer.write bytes + writer.close + + compressed.rewind + + output = IO::Memory.new + Compress::LZ4::Reader.open(compressed) do |lz4| + cnt = IO.copy(lz4, output) + end + output.bytesize.should eq bytes.bytesize + output.to_slice.should eq bytes + end + + it "can decompress small parts" do + input = IO::Memory.new("foobar" * 100000) + output = IO::Memory.new + Compress::LZ4::Writer.open(output) do |lz4| + IO.copy(input, lz4) + end + output.rewind + reader = Compress::LZ4::Reader.new(output) + reader.read_string(6).should eq "foobar" + reader.close + end + + it "can stream large amounts" do + src = "a" * 1024**2 + output = IO::Memory.new + writer = Compress::LZ4::Writer.new(output) + writer.write src.to_slice + output.rewind + reader = Compress::LZ4::Reader.new(output) + dst = Bytes.new(1024**2) + read_count = reader.read(dst) + read_count.should eq 1024**2 + reader.close + end + + it "can rewind" do + src = "a" * 1024**2 + output = IO::Memory.new + writer = Compress::LZ4::Writer.new(output) + writer.write src.to_slice + output.rewind + reader = Compress::LZ4::Reader.new(output) + dst = Bytes.new(1024**2) + read_count = reader.read(dst) + read_count.should eq 1024**2 + reader.rewind + read_count = reader.read(dst) + read_count.should eq 1024**2 + reader.close + end + + it "can not read more than there is" do + src = "a" + output = IO::Memory.new + writer = Compress::LZ4::Writer.new(output) + writer.write src.to_slice + writer.flush + output.rewind + reader = Compress::LZ4::Reader.new(output) + dst = Bytes.new(1024) + read_count = reader.read(dst) + read_count.should eq 1 + reader.close + end + + it "can compress and decompress small parts" do + rp, wp = IO.pipe + writer = Compress::LZ4::Writer.new(wp) + reader = Compress::LZ4::Reader.new(rp) + writer.print "foo" + writer.flush + reader.read_byte.should eq 'f'.ord + reader.read_byte.should eq 'o'.ord + reader.read_byte.should eq 'o'.ord + writer.close + reader.read_byte.should be_nil + end + + it "can rewind a reader" do + input = IO::Memory.new("foobar" * 100000) + output = IO::Memory.new + Compress::LZ4::Writer.open(output) do |lz4| + IO.copy(input, lz4) + end + output.rewind + Compress::LZ4::Reader.open(output) do |lz4| + lz4.read_byte.should eq 'f'.ord + lz4.rewind + lz4.read_byte.should eq 'f'.ord + end + end end diff --git a/src/lz4.cr b/src/lz4.cr index e7210f5..d2d87f0 100644 --- a/src/lz4.cr +++ b/src/lz4.cr @@ -2,7 +2,7 @@ require "semantic_version" module Compress::LZ4 - VERSION = "0.1.4" + VERSION = "1.0.0" LZ4_VERSION = SemanticVersion.parse String.new(LibLZ4.version_string) LZ4_VERSION_MINIMUM = SemanticVersion.parse("1.9.2") @@ -11,19 +11,20 @@ module Compress::LZ4 class LZ4Error < Exception end - def self.decode(compressed : Slice) - buf = IO::Memory.new(compressed) - uncompressed = Reader.open(buf) do |br| - br.gets_to_end + def self.decode(compressed : Bytes) : Bytes + input = IO::Memory.new(compressed) + output = IO::Memory.new + Reader.open(input) do |br| + IO.copy(br, output) end - uncompressed.to_slice + output.to_slice end def self.encode(content : String) encode(content.to_slice) end - def self.encode(content : Slice) + def self.encode(content : Bytes) buf = IO::Memory.new Writer.open(buf) do |br| br.write content @@ -33,4 +34,4 @@ module Compress::LZ4 end end -require "./**" +require "./lz4/*" diff --git a/src/lz4/lib.cr b/src/lz4/lib.cr index 84e4a6d..c5cdca3 100644 --- a/src/lz4/lib.cr +++ b/src/lz4/lib.cr @@ -1,5 +1,5 @@ module Compress::LZ4 - @[Link(ldflags: "`command -v pkg-config > /dev/null && pkg-config --libs liblz4 2> /dev/null|| printf %s '--llz4'`")] + @[Link("lz4")] lib LibLZ4 alias ErrorCodeT = LibC::SizeT alias Uint32T = LibC::UInt diff --git a/src/lz4/reader.cr b/src/lz4/reader.cr index e18bf36..9e7a2db 100644 --- a/src/lz4/reader.cr +++ b/src/lz4/reader.cr @@ -1,10 +1,12 @@ +require "./lib" + # A read-only `IO` object to decompress data in the LZ4 frame format. # # Instances of this class wrap another IO object. When you read from this instance # instance, it reads data from the underlying IO, decompresses it, and returns # it to the caller. # ## Example: decompress an lz4 file -# ```crystal +# ``` # require "lz4" # string = File.open("file.lz4") do |file| @@ -15,27 +17,18 @@ # pp string # ``` class Compress::LZ4::Reader < IO - include IO::Buffered - - # If `#sync_close?` is `true`, closing this IO will close the underlying IO. property? sync_close : Bool - - # Returns `true` if this reader is closed. getter? closed = false - + getter compressed_bytes = 0u64 + getter uncompressed_bytes = 0u64 @context : LibLZ4::Dctx + @opts = LibLZ4::DecompressOptionsT.new(stable_dst: 0) - # buffer size that avoids execessive round-trips between C and Crystal but doesn't waste too much - # memory on buffering. Its arbitrarily chosen. - BUF_SIZE = 64 * 1024 - - # Creates an instance of LZ4::Reader. - def initialize(@io : IO, @sync_close : Bool = false) - @buffer = Bytes.new(BUF_SIZE) - @chunk = Bytes.empty - + def initialize(@io : IO, @sync_close = false) ret = LibLZ4.create_decompression_context(out @context, LibLZ4::VERSION) - raise LZ4Error.new("Unable to create lz4 decoder instance: #{String.new(LibLZ4.get_error_name(ret))}") unless LibLZ4.is_error(ret) == 0 + raise_if_error(ret, "Failed to create decompression context") + @buffer = Bytes.new(64 * 1024) + @buffer_rem = Bytes.empty end # Creates a new reader from the given *io*, yields it to the given block, @@ -65,69 +58,74 @@ class Compress::LZ4::Reader < IO end # Always raises `IO::Error` because this is a read-only `IO`. - def unbuffered_write(slice : Bytes) + def write(slice : Bytes) : Nil raise IO::Error.new "Can't write to LZ4::Reader" end - def unbuffered_read(slice : Bytes) + def read(slice : Bytes) : Int32 check_open - return 0 if slice.empty? - - if @chunk.empty? - m = @io.read(@buffer) - return m if m == 0 - @chunk = @buffer[0, m] - end - + decompressed_bytes = 0 + hint = 0u64 # the hint from the last decompression loop do - in_remaining = @chunk.size.to_u64 - out_remaining = slice.size.to_u64 - - in_ptr = @chunk.to_unsafe - out_ptr = slice.to_unsafe - - ret = LibLZ4.decompress(@context, out_ptr, pointerof(out_remaining), in_ptr, pointerof(in_remaining), nil) - raise LZ4Error.new("lz4 decompression error: #{String.new(LibLZ4.get_error_name(ret))}") unless LibLZ4.is_error(ret) == 0 - - @chunk = @chunk[in_remaining..] - return out_remaining if ret == 0 - - if out_remaining == 0 - # Probably ran out of data and buffer needs a refill - enc_n = @io.read(@buffer) - return 0 if enc_n == 0 - @chunk = @buffer[0, enc_n] - next - end - - return out_remaining + src_remaining = @buffer_rem.size.to_u64 + src_remaining = Math.min(hint, src_remaining) unless hint.zero? + dst_remaining = slice.size.to_u64 + + hint = LibLZ4.decompress(@context, slice, pointerof(dst_remaining), @buffer_rem, pointerof(src_remaining), pointerof(@opts)) + raise_if_error(hint, "Failed to decompress") + + @buffer_rem += src_remaining + slice += dst_remaining + decompressed_bytes += dst_remaining + break if slice.empty? # got all we needed + break if hint.zero? # hint of how much more src data is needed + refill_buffer + break if @buffer_rem.empty? end - 0 + @uncompressed_bytes &+= decompressed_bytes + decompressed_bytes end - def unbuffered_flush + def flush raise IO::Error.new "Can't flush LZ4::Reader" end - # Closes this reader. - def unbuffered_close - return if @closed || @context.nil? - @closed = true + def close + if @sync_close + @io.close + @closed = true # Only really closed if io is closed + end + end + def finalize LibLZ4.free_decompression_context(@context) - @io.close if @sync_close end - def unbuffered_rewind - check_open - + def rewind @io.rewind - initialize(@io, @sync_close) + @buffer_rem = Bytes.empty + @uncompressed_bytes = 0u64 + @compressed_bytes = 0u64 + LibLZ4.reset_decompression_context(@context) + end + + private def refill_buffer + return unless @buffer_rem.empty? # never overwrite existing buffer + cnt = @io.read(@buffer) + @compressed_bytes &+= cnt + @buffer_rem = @buffer[0, cnt] + end + + private def raise_if_error(ret : Int, msg : String) + if LibLZ4.is_error(ret) != 0 + raise LZ4Error.new("#{msg}: #{String.new(LibLZ4.get_error_name(ret))}") + end end - # :nodoc: - def inspect(io : IO) : Nil - to_s(io) + # Uncompressed bytes outputted / compressed bytes read so far in the stream + def compression_ratio : Float64 + return 0.0 if @compressed_bytes.zero? + @uncompressed_bytes / @compressed_bytes end end diff --git a/src/lz4/writer.cr b/src/lz4/writer.cr index cecaa8c..13b6d53 100644 --- a/src/lz4/writer.cr +++ b/src/lz4/writer.cr @@ -1,3 +1,5 @@ +require "./lib" + # A write-only `IO` object to compress data in the LZ4 format. # # Instances of this class wrap another `IO` object. When you write to this @@ -22,120 +24,128 @@ # end # ``` class Compress::LZ4::Writer < IO - # If `#sync_close?` is `true`, closing this IO will close the underlying IO. property? sync_close : Bool + getter? closed = false + getter compressed_bytes = 0u64 + getter uncompressed_bytes = 0u64 @context : LibLZ4::Cctx - CHUNK_SIZE = 64 * 1024 @pref : LibLZ4::PreferencesT + @opts = LibLZ4::CompressOptionsT.new(stable_src: 0) + @header_written = false - def initialize(@output : IO, options : CompressOptions = WriterOptions.default, @sync_close : Bool = false) + def initialize(@output : IO, options = CompressOptions.new, @sync_close = false) ret = LibLZ4.create_compression_context(out @context, LibLZ4::VERSION) - raise LZ4Error.new("Unable to create lz4 encoder instance: #{String.new(LibLZ4.get_error_name(ret))}") unless LibLZ4.is_error(ret) == 0 - + raise_if_error(ret, "Failed to create compression context") @pref = options.to_preferences - buf_size = LibLZ4.compress_frame_bound(CHUNK_SIZE, pointerof(@pref)) - @buffer = Bytes.new(buf_size) - - @header_written = false - @closed = false + @block_size = case options.block_size + in BlockSize::Default then 64 * 1024 + in BlockSize::Max64Kb then 64 * 1024 + in BlockSize::Max256Kb then 256 * 1024 + in BlockSize::Max1Mb then 1024 * 1024 + in BlockSize::Max4Mb then 4 * 1024 * 1024 + end + buffer_size = LibLZ4.compress_bound(@block_size, pointerof(@pref)) + @buffer = Bytes.new(buffer_size) end # Creates a new writer to the given *filename*. - def self.new(filename : String, options : CompressOptions = CompressOptions.default) + def self.new(filename : String, options = CompressOptions.new) new(::File.new(filename, "w"), options: options, sync_close: true) end # Creates a new writer to the given *io*, yields it to the given block, # and closes it at the end. - def self.open(io : IO, options : CompressOptions = CompressOptions.default, sync_close = false) - writer = new(io, preset: preset, sync_close: sync_close) + def self.open(io : IO, options = CompressOptions.new, sync_close = false) + writer = new(io, options: options, sync_close: sync_close) yield writer ensure writer.close end # Creates a new writer to the given *filename*, yields it to the given block, # and closes it at the end. - def self.open(filename : String, options : CompressOptions = CompressOptions.default) + def self.open(filename : String, options = CompressOptions.new) writer = new(filename, options: options) yield writer ensure writer.close end # Creates a new writer for the given *io*, yields it to the given block, # and closes it at its end. - def self.open(io : IO, options : CompressOptions = CompressOptions.default, sync_close : Bool = false) + def self.open(io : IO, options = CompressOptions.new, sync_close = false) writer = new(io, options: options, sync_close: sync_close) yield writer ensure writer.close end - # Always raises `IO::Error` because this is a write-only `IO`. def read(slice : Bytes) raise IO::Error.new "Can't read from LZ4::Writer" end private def write_header return if @header_written - @buffer.to_unsafe.clear(@buffer.size) - header_size = LibLZ4.compress_begin(@context, @buffer.to_unsafe, @buffer.size, pointerof(@pref)) - raise LZ4Error.new("Failed to start compression: #{String.new(LibLZ4.get_error_name(header_size))}") unless LibLZ4.is_error(header_size) == 0 - @output.write(@buffer[...header_size]) if header_size > 0 + ret = LibLZ4.compress_begin(@context, @buffer, @buffer.size, pointerof(@pref)) + raise_if_error(ret, "Failed to begin compression") + @compressed_bytes &+= ret + @output.write(@buffer[0, ret]) @header_written = true end - # See `IO#write`. def write(slice : Bytes) : Nil check_open - return 0i64 if slice.empty? write_header - while slice.size > 0 - write_size = slice.size - write_size = @buffer.size if write_size > @buffer.size - @buffer.to_unsafe.clear(@buffer.size) - - comp_size = LibLZ4.compress_update(@context, @buffer.to_unsafe, @buffer.size, slice.to_unsafe, write_size, nil) - raise LZ4Error.new("Compression failed: #{String.new(LibLZ4.get_error_name(comp_size))}") unless LibLZ4.is_error(comp_size) == 0 - @output.write(@buffer[...comp_size]) if comp_size > 0 - # 0 means data was buffered, to avoid buffer too small problem at end, - # let's flush the data manually - flush if comp_size == 0 - slice = slice[write_size..] + @uncompressed_bytes &+= slice.size + until slice.empty? + read_size = Math.min(slice.size, @block_size) + ret = LibLZ4.compress_update(@context, @buffer, @buffer.size, slice, read_size, pointerof(@opts)) + raise_if_error(ret, "Failed to compress") + @compressed_bytes &+= ret + @output.write(@buffer[0, ret]) + slice += read_size end end - # See `IO#flush`. - def flush - return if @closed - @buffer.to_unsafe.clear(@buffer.size) - - ret = LibLZ4.flush(@context, @buffer.to_unsafe, @buffer.size, nil) - raise LZ4Error.new("Flush failed: #{String.new(LibLZ4.get_error_name(ret))}") unless LibLZ4.is_error(ret) == 0 - @output.write(@buffer[...ret]) if ret > 0 + # Flush LZ4 lib buffers even if a block isn't full + def flush : Nil + check_open + ret = LibLZ4.flush(@context, @buffer, @buffer.size, pointerof(@opts)) + raise_if_error(ret, "Failed to flush") + @compressed_bytes &+= ret + @output.write(@buffer[0, ret]) + @output.flush end - # Closes this writer. Must be invoked after all data has been written. + # Ends the current LZ4 frame, the stream can still be written to, unless @sync_close def close - return if @closed || @context.nil? - - @buffer.to_unsafe.clear(@buffer.size) - comp_size = LibLZ4.compress_end(@context, @buffer.to_unsafe, @buffer.size, nil) - raise LZ4Error.new("Failed to end compression: #{String.new(LibLZ4.get_error_name(comp_size))}") unless LibLZ4.is_error(comp_size) == 0 - @output.write(@buffer[...comp_size]) if comp_size > 0 + check_open + ret = LibLZ4.compress_end(@context, @buffer, @buffer.size, pointerof(@opts)) + raise_if_error(ret, "Failed to end frame") + @compressed_bytes &+= ret + @output.write(@buffer[0, ret]) + @output.flush @header_written = false + ensure + if @sync_close + @closed = true # the stream can still be written until the underlaying io is closed + @output.close + end + end + def finalize LibLZ4.free_compression_context(@context) - @closed = true - @output.close if @sync_close end - # Returns `true` if this IO is closed. - def closed? - @closed + private def raise_if_error(ret : Int, msg : String) + unless LibLZ4.is_error(ret).zero? + raise LZ4Error.new("#{msg}: #{String.new(LibLZ4.get_error_name(ret))}") + end end - # :nodoc: - def inspect(io : IO) : Nil - to_s(io) + # Uncompressed bytes read / compressed bytes outputted so far in the stream + def compression_ratio : Float64 + return 0.0 if @compressed_bytes.zero? + @uncompressed_bytes / @compressed_bytes end end +alias Compress::LZ4::BlockSize = Compress::LZ4::LibLZ4::BlockSizeIdT + struct Compress::LZ4::CompressOptions enum CompressionLevel FAST = 0 @@ -144,15 +154,14 @@ struct Compress::LZ4::CompressOptions OPT_MIN = 10 MAX = 12 end - # block size - property block_size : LibLZ4::BlockSizeIdT + property block_size : BlockSize property block_mode_linked : Bool property checksum : Bool property compression_level : CompressionLevel property auto_flush : Bool property favor_decompression_speed : Bool - def initialize(@block_size = LibLZ4::BlockSizeIdT::Max256Kb, @block_mode_linked = true, @checksum = false, + def initialize(@block_size = BlockSize::Default, @block_mode_linked = true, @checksum = false, @compression_level = CompressionLevel::FAST, @auto_flush = false, @favor_decompression_speed = false) end @@ -175,8 +184,6 @@ struct Compress::LZ4::CompressOptions pref.auto_flush = auto_flush ? 1 : 0 pref.favor_dec_speed = favor_decompression_speed ? 1 : 0 - pref.reserved = StaticArray[0_u32, 0_u32, 0_u32] - pref end end