diff --git a/CITATION.bib b/CITATION.bib index f1a8184..3c52b81 100644 --- a/CITATION.bib +++ b/CITATION.bib @@ -2,7 +2,7 @@ @misc{InferOpt.jl author = {Guillaume Dalle, Léo Baty, Louis Bouvier and Axel Parmentier}, title = {InferOpt.jl}, url = {https://github.com/axelparmentier/InferOpt.jl}, - version = {v0.2.0}, + version = {v0.3.0}, year = {2022}, - month = {6} + month = {7} } diff --git a/Project.toml b/Project.toml index 6cd2fa0..3a07f07 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "InferOpt" uuid = "4846b161-c94e-4150-8dac-c7ae193c601f" authors = ["Guillaume Dalle", "Léo Baty", "Louis Bouvier", "Axel Parmentier"] -version = "0.2.0" +version = "0.3.0" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" diff --git a/docs/src/tutorial.md b/docs/src/tutorial.md index 16c3d64..e12da01 100644 --- a/docs/src/tutorial.md +++ b/docs/src/tutorial.md @@ -121,7 +121,7 @@ Thanks to this smoothing, we can now train our model with a standard gradient op ````@example tutorial encoder = deepcopy(initial_encoder) -opt = ADAM(); +opt = Adam(); losses = Float64[] for epoch in 1:200 l = 0.0 diff --git a/src/InferOpt.jl b/src/InferOpt.jl index 9243efe..0a65d63 100644 --- a/src/InferOpt.jl +++ b/src/InferOpt.jl @@ -16,7 +16,7 @@ using StatsBase: StatsBase, sample using Test include("utils/probability_distribution.jl") -include("utils/composition.jl") +include("utils/pushforward.jl") include("interpolation/interpolation.jl") @@ -42,8 +42,9 @@ include("ssvm/isbaseloss.jl") include("ssvm/zeroone_baseloss.jl") include("ssvm/ssvm_loss.jl") -export FixedAtomsProbabilityDistribution, sample, compute_expectation -export ProbabilisticComposition +export FixedAtomsProbabilityDistribution +export compute_expectation, compress_distribution! +export Pushforward export compute_probability_distribution export Interpolation diff --git a/src/utils/probability_distribution.jl b/src/utils/probability_distribution.jl index 48744cd..e1ec754 100644 --- a/src/utils/probability_distribution.jl +++ b/src/utils/probability_distribution.jl @@ -46,11 +46,13 @@ end Base.rand(probadist::FixedAtomsProbabilityDistribution) = rand(GLOBAL_RNG, probadist) """ - compress!(probadist[; atol]) + compress_distribution!(probadist[; atol]) Remove duplicated atoms in `probadist` (up to a tolerance on equality). """ -function compress!(probadist::FixedAtomsProbabilityDistribution{A,W}; atol=0) where {A,W} +function compress_distribution!( + probadist::FixedAtomsProbabilityDistribution{A,W}; atol=0 +) where {A,W} (; atoms, weights) = probadist to_delete = Int[] for i in length(probadist):-1:1 diff --git a/src/utils/composition.jl b/src/utils/pushforward.jl similarity index 71% rename from src/utils/composition.jl rename to src/utils/pushforward.jl index bc3b146..827eb57 100644 --- a/src/utils/composition.jl +++ b/src/utils/pushforward.jl @@ -1,9 +1,9 @@ """ - ProbabilisticComposition{L,G} + Pushforward{L,G} Differentiable composition of a probabilistic `layer` with an arbitrary function `post_processing`. -`ProbabilisticComposition` can be used for direct regret minimization (aka learning by experience) when the post-processing returns a cost. +`Pushforward` can be used for direct regret minimization (aka learning by experience) when the post-processing returns a cost. # Fields - `layer::L`: anything that implements `compute_probability_distribution(layer, θ; kwargs...)` @@ -11,14 +11,14 @@ Differentiable composition of a probabilistic `layer` with an arbitrary function See also: [`FixedAtomsProbabilityDistribution`](@ref). """ -struct ProbabilisticComposition{L,P} +struct Pushforward{L,P} layer::L post_processing::P end -function Base.show(io::IO, composition::ProbabilisticComposition) +function Base.show(io::IO, composition::Pushforward) (; layer, post_processing) = composition - return print(io, "ProbabilisticComposition($layer, $post_processing)") + return print(io, "Pushforward($layer, $post_processing)") end """ @@ -30,9 +30,7 @@ This function is not differentiable if `composition.post_processing` isn't. See also: [`apply_on_atoms`](@ref). """ -function compute_probability_distribution( - composition::ProbabilisticComposition, θ; kwargs... -) +function compute_probability_distribution(composition::Pushforward, θ; kwargs...) (; layer, post_processing) = composition probadist = compute_probability_distribution(layer, θ; kwargs...) post_processed_probadist = apply_on_atoms(post_processing, probadist; kwargs...) @@ -40,7 +38,7 @@ function compute_probability_distribution( end """ - (composition::ProbabilisticComposition)(θ) + (composition::Pushforward)(θ) Output the expectation of `composition.post_processing(X)`, where `X` follows the distribution defined by `composition.layer` applied to `θ`. @@ -48,7 +46,7 @@ Unlike [`compute_probability_distribution(composition, θ)`](@ref), this functio See also: [`compute_expectation`](@ref). """ -function (composition::ProbabilisticComposition)(θ::AbstractArray{<:Real}; kwargs...) +function (composition::Pushforward)(θ::AbstractArray{<:Real}; kwargs...) (; layer, post_processing) = composition probadist = compute_probability_distribution(layer, θ; kwargs...) return compute_expectation(probadist, post_processing; kwargs...) diff --git a/test/argmax.jl b/test/argmax.jl index 0306405..fad4cc0 100644 --- a/test/argmax.jl +++ b/test/argmax.jl @@ -77,21 +77,19 @@ pipelines_experience = [ ( encoder=encoder_factory(), maximizer=identity, - loss=ProbabilisticComposition( - PerturbedAdditive(true_maximizer; ε=1.0, nb_samples=10), cost - ), + loss=Pushforward(PerturbedAdditive(true_maximizer; ε=1.0, nb_samples=10), cost), ), ( encoder=encoder_factory(), maximizer=identity, - loss=ProbabilisticComposition( + loss=Pushforward( PerturbedMultiplicative(true_maximizer; ε=1.0, nb_samples=10), cost ), ), ( encoder=encoder_factory(), maximizer=identity, - loss=ProbabilisticComposition( + loss=Pushforward( RegularizedGeneric(true_maximizer, half_square_norm, identity), cost ), ), diff --git a/test/paths.jl b/test/paths.jl index 014cac2..4e1552c 100644 --- a/test/paths.jl +++ b/test/paths.jl @@ -79,21 +79,19 @@ pipelines_experience = [ ( encoder=encoder_factory(), maximizer=identity, - loss=ProbabilisticComposition( - PerturbedAdditive(true_maximizer; ε=1.0, nb_samples=10), cost - ), + loss=Pushforward(PerturbedAdditive(true_maximizer; ε=1.0, nb_samples=10), cost), ), ( encoder=encoder_factory(), maximizer=identity, - loss=ProbabilisticComposition( + loss=Pushforward( PerturbedMultiplicative(true_maximizer; ε=1.0, nb_samples=10), cost ), ), ( encoder=encoder_factory(), maximizer=identity, - loss=ProbabilisticComposition( + loss=Pushforward( RegularizedGeneric(true_maximizer, half_square_norm, identity), cost ), ), diff --git a/test/ranking.jl b/test/ranking.jl index 45bf740..bfff13f 100644 --- a/test/ranking.jl +++ b/test/ranking.jl @@ -71,21 +71,19 @@ pipelines_experience = [ ( encoder=encoder_factory(), maximizer=identity, - loss=ProbabilisticComposition( - PerturbedAdditive(true_maximizer; ε=1.0, nb_samples=10), cost - ), + loss=Pushforward(PerturbedAdditive(true_maximizer; ε=1.0, nb_samples=10), cost), ), ( encoder=encoder_factory(), maximizer=identity, - loss=ProbabilisticComposition( + loss=Pushforward( PerturbedMultiplicative(true_maximizer; ε=1.0, nb_samples=10), cost ), ), ( encoder=encoder_factory(), maximizer=identity, - loss=ProbabilisticComposition( + loss=Pushforward( RegularizedGeneric(true_maximizer, half_square_norm, identity), cost ), ), diff --git a/test/tutorial.jl b/test/tutorial.jl index dc49bcb..65c1388 100644 --- a/test/tutorial.jl +++ b/test/tutorial.jl @@ -111,7 +111,7 @@ Thanks to this smoothing, we can now train our model with a standard gradient op =# encoder = deepcopy(initial_encoder) -opt = ADAM(); +opt = Adam(); losses = Float64[] for epoch in 1:200 l = 0.0 diff --git a/test/utils/pipeline.jl b/test/utils/pipeline.jl index ff98dba..0c567fb 100644 --- a/test/utils/pipeline.jl +++ b/test/utils/pipeline.jl @@ -37,7 +37,7 @@ function test_pipeline!( @info "Testing $setting_name" maximizer loss ## Optimization - opt = ADAM() + opt = Adam() perf_storage = init_perf() prog = Progress(epochs; enabled=verbose)