From c1e8cc8440ec2a3a917429700e0a72da577f9aa0 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 18 Jul 2021 14:07:56 +0200 Subject: [PATCH] Support s3 paths (#103) --- .github/workflows/UnitTest.yml | 5 ++-- Project.toml | 3 +- src/Deserialization/deserialization.jl | 10 +++---- src/Loggers/LogEmbeddings.jl | 19 +++++++----- src/TBLogger.jl | 16 +++++----- src/event.jl | 4 +-- test/runtests.jl | 41 ++++++++++++++++++-------- 7 files changed, 59 insertions(+), 39 deletions(-) diff --git a/.github/workflows/UnitTest.yml b/.github/workflows/UnitTest.yml index 4b4290d3..f7796971 100644 --- a/.github/workflows/UnitTest.yml +++ b/.github/workflows/UnitTest.yml @@ -50,7 +50,8 @@ jobs: PackageSpec(name="Reexport", version="0.2"), PackageSpec(name="Plots", version="1.6"), ]) + # Remove Minio from the test project + write("Project.toml", read(`grep -v '^Minio =' Project.toml`, String)) + run(`sed -i -e 's/, "Minio"//' Project.toml`) shell: julia --project=. --startup=no --color=yes {0} - - uses: julia-actions/julia-runtest@latest - diff --git a/Project.toml b/Project.toml index 8f02ab7e..7ac4406b 100644 --- a/Project.toml +++ b/Project.toml @@ -20,6 +20,7 @@ StatsBase = "0.27, 0.28, 0.29, 0.30, 0.31, 0.32, 0.33" julia = "1.3" [extras] +Minio = "4281f0d9-7ae0-406e-9172-b7277c1efa20" ImageMagick = "6218d12a-5da1-5696-b52f-db25d2ecc6d1" LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" @@ -33,4 +34,4 @@ ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7" WAV = "8149f6b0-98f6-5db9-b78f-408fbbb8ef88" [targets] -test = ["Test", "MLDatasets", "TestImages", "ImageMagick", "Logging", "LightGraphs", "Plots", "PyPlot", "WAV", "Tracker", "ValueHistories"] +test = ["Test", "MLDatasets", "TestImages", "ImageMagick", "Logging", "LightGraphs", "Plots", "PyPlot", "WAV", "Tracker", "ValueHistories", "Minio"] diff --git a/src/Deserialization/deserialization.jl b/src/Deserialization/deserialization.jl index bd3295cc..146ae28a 100644 --- a/src/Deserialization/deserialization.jl +++ b/src/Deserialization/deserialization.jl @@ -1,13 +1,13 @@ export summary_iterator """ - is_valid_event(f::IOStream) => Bool + is_valid_event(f::IO) => Bool Returns true if the stream points to a valid TensorBoard event, false overwise. This is accomplished by checeking the crc checksum on the header (first 8 bytes) of the event. """ -function is_valid_event(f::IOStream) +function is_valid_event(f::IO) eof(f) && return false header = read(f, 8) @@ -23,13 +23,13 @@ end """ - read_event(f::IOStream) => Event + read_event(f::IO) => Event Reads the stream `f`, assuming it's encoded according to TensorBoard format, and decodes a single event. This function assumes that `eof(f) == false`. """ -function read_event(f::IOStream) +function read_event(f::IO) header = read(f, 8) crc_header = read(f, 4) @@ -113,7 +113,7 @@ Iterator for iterating along a fstream. The optional argument `stop_at_step` tells at what step the iterator should stop. """ struct TBEventFileIterator - fstream::IOStream + fstream::IO stop_at_step::Int end TBEventFileIterator(fstream) = TBEventFileIterator(fstream, typemax(Int)) diff --git a/src/Loggers/LogEmbeddings.jl b/src/Loggers/LogEmbeddings.jl index 1e73eb1a..10de426d 100644 --- a/src/Loggers/LogEmbeddings.jl +++ b/src/Loggers/LogEmbeddings.jl @@ -34,7 +34,7 @@ function log_embeddings(logger::TBLogger, name::AbstractString, mat::AbstractMat write_pbtext(name, logger.logdir, matrix_path, metadata, img_labels, step) end -function write_matrix(mat::AbstractMatrix, matrix_path::AbstractString) +function write_matrix(mat::AbstractMatrix, matrix_path) matrix_path = joinpath(matrix_path, "tensor.tsv") mat = convert(Array{Float64,2}, mat) open(matrix_path, "w") do file @@ -45,7 +45,7 @@ function write_matrix(mat::AbstractMatrix, matrix_path::AbstractString) end end -function write_metadata(metadata::AbstractArray, matrix_path::AbstractString) +function write_metadata(metadata::AbstractArray, matrix_path) matrix_path = joinpath(matrix_path, "metadata.tsv") open(matrix_path, "w") do file for x in metadata @@ -54,7 +54,7 @@ function write_metadata(metadata::AbstractArray, matrix_path::AbstractString) end end -function write_sprite(img_labels::AbstractArray, matrix_path::AbstractString) +function write_sprite(img_labels::AbstractArray, matrix_path) n, _, _, w = size(img_labels) sqrt(n)*w <= 8192 || throw(ErrorException("the value √N * W must be less than or equal to 8192 because of tensorboard restrictions")) total_pixels = size(img_labels, 1)*size(img_labels, 3)*size(img_labels, 4) @@ -65,7 +65,9 @@ function write_sprite(img_labels::AbstractArray, matrix_path::AbstractString) arranged_augment_square_CHW = zeros((3, sprite_size, sprite_size)) arranged_augment_square_CHW[:, 1:size(arranged_img_CHW, 2), :] = arranged_img_CHW sprite_path = joinpath(matrix_path, "sprite.png") - save(sprite_path, colorview(RGB, arranged_augment_square_CHW)) + open(sprite_path; write=true) do io + save(Stream{format"PNG"}(io), colorview(RGB, arranged_augment_square_CHW)) + end end function make_grid_of_images(img_labels::AbstractArray, ncols::Integer) @@ -89,21 +91,22 @@ function make_grid_of_images(img_labels::AbstractArray, ncols::Integer) grid end -function write_pbtext(name::AbstractString, path::AbstractString, matrix_path::AbstractString, metadata, img_labels, step) +function write_pbtext(name::AbstractString, path, matrix_path, metadata, img_labels, step) metadata_path = joinpath(matrix_path, "metadata.tsv") img_labels_path = joinpath(matrix_path, "sprite.png") matrix_path = joinpath(matrix_path, "tensor.tsv") path = joinpath(path, "projector_config.pbtxt") + isfile(path) || write(path, "") # workaround https://github.com/JuliaCloud/AWSS3.jl/issues/173 open(path, "a") do file write(file, "embeddings {\n") write(file, "tensor_name: \""*name*":"*repr(step)*"\"\n") - write(file, "tensor_path: \""*matrix_path*"\"\n") + write(file, string("tensor_path: \"", matrix_path, "\"\n")) if metadata != nothing - write(file, "metadata_path: \""*metadata_path*"\"\n") + write(file, string("metadata_path: \"", metadata_path, "\"\n")) end if img_labels != nothing write(file, "sprite {\n") - write(file, "image_path: \""*img_labels_path*"\"\n") + write(file, string("image_path: \"", img_labels_path, "\"\n")) write(file, "single_image_dim: "*string(size(img_labels, 4))*"\n") write(file, "single_image_dim: "*string(size(img_labels, 3))*"\n") write(file, "}\n") diff --git a/src/TBLogger.jl b/src/TBLogger.jl index 3debeae8..4cde028d 100644 --- a/src/TBLogger.jl +++ b/src/TBLogger.jl @@ -1,7 +1,7 @@ -mutable struct TBLogger <: AbstractLogger - logdir::String - file::IOStream - all_files::Dict{String, IOStream} +mutable struct TBLogger{P,S} <: AbstractLogger + logdir::P + file::S + all_files::Dict{String, S} global_step::Int step_increment::Int min_level::LogLevel @@ -52,7 +52,7 @@ function TBLogger(logdir="tensorboard_logs/run", overwrite=tb_increment; all_files = Dict(fpath => evfile) start_step = something(purge_step, 0) - TBLogger(logdir, evfile, all_files, start_step, step_increment, min_level) + TBLogger{typeof(logdir), typeof(evfile)}(logdir, evfile, all_files, start_step, step_increment, min_level) end """ @@ -89,7 +89,7 @@ function init_logdir(logdir, overwrite=tb_increment) end """ - create_eventfile(logdir, [purge_step=nothing; time=time()]) -> IOStream + create_eventfile(logdir, [purge_step=nothing; time=time()]) -> IO Creates a protobuffer events file in the logdir and returns the IO buffer for writing to it. If `purge_step::Int` is passed then a special event is written @@ -144,14 +144,14 @@ logdir(lg::TBLogger) = lg.logdir """ get_file(lg::TBLogger) -> IOS -Returns the main `file` IOStream object of Logger `lg`. +Returns the main `file` IO object of Logger `lg`. """ get_file(lg::TBLogger) = lg.file """ get_file(lg, tags::String...) -> IOS -Returns the `file` IOStream object of Logger `lg` writing to the tag +Returns the `file` IO object of Logger `lg` writing to the tag `tags1/tags2.../tagsN`. """ function get_file(lg::TBLogger, tags::String...) diff --git a/src/event.jl b/src/event.jl index d1df54d7..f9122f13 100644 --- a/src/event.jl +++ b/src/event.jl @@ -16,7 +16,7 @@ function make_event(logger::TBLogger, summary::GraphDef; step=TensorBoardLogger. end """ - write_event(out::IOStream, event::Event) + write_event(out::IO, event::Event) Serializes the Event `event` to the `out` stream according to the TensorBoard format. The format follows the following rule (in bytes) @@ -26,7 +26,7 @@ format. The format follows the following rule (in bytes) #3 16...N - serialized `event` as protobuffer #4 N..N+8 UInt32 masked_CRC of #3 """ -function write_event(out::IOStream, event::Event) +function write_event(out::IO, event::Event) data = PipeBuffer(); _writeproto(data, event) diff --git a/test/runtests.jl b/test/runtests.jl index 137c798b..e0de2715 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,20 +6,31 @@ using ImageCore using FileIO using LightGraphs -test_log_dir = "test_logs/" ENV["DATADEPS_ALWAYS_ACCEPT"] = true ENV["GKSwstype"] = "100" - ENV["DATADEPS_ALWAYS_ACCEPT"] = true -@testset "TensorBoardLogger" begin +LOG_DIRS = Any["test_logs/"] + +if VERSION >= v"1.5" + using Minio + # Setup Minio server to test s3 paths + minio_server = Minio.Server(mktempdir(); address="localhost:9001") + run(minio_server, wait=false) + config = MinioConfig("http://localhost:9001") + s3_create_bucket(config, "tensorboard-tests") + s3_log_dir = S3Path("s3://tensorboard-tests/logdir/"; config=config) + push!(LOG_DIRS, s3_log_dir) +end + +@testset "TensorBoardLogger with path $(test_log_dir)" for test_log_dir in LOG_DIRS @testset "TBLogger" begin include("test_TBLogger.jl") end @testset "Scalar Value Logger" begin - logger = TBLogger(test_log_dir*"t", tb_overwrite) + logger = TBLogger(joinpath(test_log_dir, "t/"), tb_overwrite) step = 1 ss = TensorBoardLogger.scalar_summary("test", 12.0) @@ -49,7 +60,7 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true end @testset "Histogram Value Logger" begin - logger = TBLogger(test_log_dir*"t", tb_overwrite) + logger = TBLogger(joinpath(test_log_dir, "t/"), tb_overwrite) step = 1 x0 = 0.5+step/30; s0 = 0.5/(step/20); @@ -96,7 +107,7 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true end @testset "Text Logger" begin - logger = TBLogger(test_log_dir*"t", tb_overwrite) + logger = TBLogger(joinpath(test_log_dir, "t/"), tb_overwrite) step = 1 ss = TensorBoardLogger.text_summary("test", "Hello World") @@ -127,7 +138,7 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true end @testset "Image Logger" begin - logger = TBLogger(test_log_dir*"t", tb_overwrite) + logger = TBLogger(joinpath(test_log_dir, "t/"), tb_overwrite) step = 1 # The following tests are akin to @test_nothrow, which does not exist. @@ -217,7 +228,7 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true end @testset "LogInterface" begin - logger = TBLogger(test_log_dir*"t", tb_overwrite) + logger = TBLogger(joinpath(test_log_dir, "t/"), tb_overwrite) woman = testimage("woman_blonde") mri = testimage("mri") with_logger(logger) do @@ -238,7 +249,7 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true end @testset "Audio Logger" begin - logger = TBLogger(test_log_dir*"t", tb_overwrite) + logger = TBLogger(joinpath(test_log_dir, "t/"), tb_overwrite) step = 1 ss = TensorBoardLogger.audio_summary("test", rand(800), 800) @@ -255,7 +266,7 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true end @testset "Graph Logger" begin - logger = TBLogger(test_log_dir*"t", tb_overwrite) + logger = TBLogger(joinpath(test_log_dir, "t/"), tb_overwrite) step = 1 ss = TensorBoardLogger.graph_summary(DiGraph(1), ["1"], ["1"], ["cpu"], [nothing]) @test isa(ss, TensorBoardLogger.GraphDef) @@ -272,14 +283,14 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true end @testset "Embedding Logger" begin - logger = TBLogger(test_log_dir*"t", tb_overwrite) + logger = TBLogger(joinpath(test_log_dir, "t/"), tb_overwrite) step = 1 mat = rand(4, 4) metadata = rand(4, 10) metadata_header = Array(collect(1:10)) imgs = TBImages(rand(8, 8, 3, 4), HWCN) - @test π != log_embeddings(logger, "random1", mat, metadata = metadata, metadata_header = metadata_header, img_labels = imgs, step = step) - @test π != log_embeddings(logger, "random2", mat, step = step+1) + @test π != log_embeddings(logger, "random1/", mat, metadata = metadata, metadata_header = metadata_header, img_labels = imgs, step = step) + @test π != log_embeddings(logger, "random2/", mat, step = step+1) close.(values(logger.all_files)) end @@ -318,3 +329,7 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true rm(test_log_dir, force=true, recursive=true) end + +if VERSION >= v"1.5" + kill(minio_server) +end