From 6139d12d25854c647ea1c8c7e4810d298bda3319 Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 14:46:20 -0400 Subject: [PATCH 01/25] func{T}() -> func() where {T} --- src/bayes.jl | 10 +++++----- src/compat.jl | 8 ++++---- src/data.jl | 18 +++++++++--------- src/distributions.jl | 2 +- src/gmms.jl | 22 +++++++++++----------- src/gmmtypes.jl | 12 ++++++------ src/rand.jl | 2 +- src/recognizer.jl | 7 ++++--- src/stats.jl | 2 +- 9 files changed, 42 insertions(+), 41 deletions(-) diff --git a/src/bayes.jl b/src/bayes.jl index dc46cb9..8f891f5 100644 --- a/src/bayes.jl +++ b/src/bayes.jl @@ -15,7 +15,7 @@ using SpecialFunctions: digamma ## initialize a prior with minimal knowledge -function GMMprior{T<:AbstractFloat}(d::Int, alpha::T, beta::T) +function GMMprior(d::Int, alpha::T, beta::T) where {T<:AbstractFloat} m₀ = zeros(T, d) W₀ = eye(T, d) ν₀ = convert(T,d) @@ -24,7 +24,7 @@ end Base.copy(p::GMMprior) = GMMprior(p.α₀, p.β₀, copy(p.m₀), p.ν₀, copy(p.W₀)) ## initialize from a GMM and nₓ, the number of points used to train the GMM. -function VGMM{T}(g::GMM{T}, π::GMMprior{T}) +function VGMM(g::GMM{T}, π::GMMprior{T}) where {T} nₓ = g.nx N = g.w * nₓ mx = g.μ @@ -61,7 +61,7 @@ function GMM(vg::VGMM) end ## m-step given prior and stats -function mstep{T}(π::GMMprior, N::Vector{T}, mx::Matrix{T}, S::Vector) +function mstep(π::GMMprior, N::Vector{T}, mx::Matrix{T}, S::Vector) where {T} ng = length(N) α = π.α₀ + N # ng, 10.58 ν = π.ν₀ + N + 1 # ng, 10.63 @@ -147,7 +147,7 @@ end ## Like for the GMM, we return nₓ, (some value), zeroth, first, second order stats ## All return values can be accumulated, except r, which we need for ## lowerbound ElogpZπ and ElogqZ -function stats{T}(vg::VGMM, x::Matrix{T}, ex::Tuple) +function stats(vg::VGMM, x::Matrix{T}, ex::Tuple) where {T} ng = vg.n (nₓ, d) = size(x) if nₓ == 0 @@ -183,7 +183,7 @@ function stats(vg::VGMM, d::Data, ex::Tuple; parallel=false) end ## trace(A*B) = sum(A' .* B) -function trAB{T1,T2}(A::Matrix{T1}, B::Matrix{T2}) +function trAB(A::Matrix{T1}, B::Matrix{T2}) where {T1,T2} RT = promote_type(T1,T2) nr, nc = size(A) size(B) == (nc, nr) || error("Inconsistent matrix size") diff --git a/src/compat.jl b/src/compat.jl index a8582b1..d51e281 100644 --- a/src/compat.jl +++ b/src/compat.jl @@ -4,18 +4,18 @@ if VERSION < v"0.5.0-dev" end ## NumericExtensions is no longer supported, underoptimized implementation: -function logsumexp{T<:AbstractFloat}(x::AbstractVector{T}) +function logsumexp(x::AbstractVector{T}) where {T<:AbstractFloat} m = maximum(x) log(sum(exp.(x .- m))) + m end -logsumexp{T<:AbstractFloat}(x::Matrix{T}, dim::Integer) = mapslices(logsumexp, x, dim) +logsumexp(x::Matrix{T}, dim::Integer) where {T<:AbstractFloat} = mapslices(logsumexp, x, dim) ## Also NumericExtensions' semantics of dot() is no longer supported. -function Base.dot{T<:AbstractFloat}(x::Matrix{T}, y::Matrix{T}) +function LinearAlgebra.dot(x::Matrix{T}, y::Matrix{T}) where {T<:AbstractFloat} size(x) == size(y) || error("Matrix sizes must match") dot(vec(x), vec(y)) end -function Base.dot{T<:AbstractFloat}(x::Matrix{T}, y::Matrix{T}, dim::Integer) +function LinearAlgebra.dot(x::Matrix{T}, y::Matrix{T}, dim::Integer) where {T<:AbstractFloat} size(x) == size(y) || error("Matrix sizes must match") if dim==1 r = zeros(T, 1, size(x,2)) diff --git a/src/data.jl b/src/data.jl index 63bb648..fbefee7 100644 --- a/src/data.jl +++ b/src/data.jl @@ -1,16 +1,16 @@ ## data.jl Julia code to handle matrix-type data on disc ## report the kind of Data structure from the type instance -kind{T,S<:AbstractString}(d::Data{T,S}) = :file -kind{T}(d::Data{T,Matrix{T}}) = :matrix -Base.eltype{T}(d::Data{T}) = T +kind(d::Data{T,S}) where {T,S<:AbstractString} = :file +kind(d::Data{T,Matrix{T}}) where {T} = :matrix +Base.eltype(d::Data{T}) where {T} = T ## constructor for a plain matrix. rowvectors: data points x represented as rowvectors -Data{T}(x::Matrix{T}) = Data(Matrix{T}[x]) +Data(x::Matrix{T}) where {T} = Data(Matrix{T}[x]) ## constructor for a vector of files ## Data([strings], type, loadfunction) -function Data{S<:AbstractString}(files::Vector{S}, datatype::DataType, load::Function) +function Data(files::Vector{S}, datatype::DataType, load::Function) where {S<:AbstractString} Data(files, datatype, @compat Dict(:load => load)) end @@ -32,7 +32,7 @@ function JLD.save(file::AbstractString, x::Matrix) end ## Data([strings], type; load=loadfunction, size=sizefunction) -function Data{S<:AbstractString}(files::Vector{S}, datatype::DataType; kwargs...) +function Data(files::Vector{S}, datatype::DataType; kwargs...) where {S<:AbstractString} all([isa((k,v), (Symbol,Function)) for (k,v) in kwargs]) || error("Wrong type of argument", args) d = Dict{Symbol,Function}([kwargs...]) if !haskey(d, :load) @@ -60,7 +60,7 @@ function Base.getindex(x::Data, i::Int) end ## A range as index (including [1:1]) returns a Data object, not the data. -function Base.getindex{T,VT}(x::Data{T,VT}, r::Range) +function Base.getindex(x::Data{T,VT}, r::UnitRange{Int}) where {T,VT} Data{T, VT}(x.list[r], x.API) end @@ -136,7 +136,7 @@ function dmapreduce(f::Function, op::Function, x::Data) end ## stats: compute nth order stats for array (this belongs in stats.jl) -function stats{T<:AbstractFloat}(x::Matrix{T}, order::Int=2; kind=:diag, dim=1) +function stats(x::Matrix{T}, order::Int=2; kind=:diag, dim=1) where {T<:AbstractFloat} if dim==1 n, d = size(x) else @@ -278,7 +278,7 @@ end Base.collect(d::Data) = vcat([x for x in d]...) -function Base.convert{Td,Ts}(::Type{Data{Td}}, d::Data{Ts}) +function Base.convert(::Type{Data{Td}}, d::Data{Ts}) where {Td,Ts} Td == Ts && return d if kind(d) == :file api = copy(d.API) diff --git a/src/distributions.jl b/src/distributions.jl index f4ecfee..4e57e07 100644 --- a/src/distributions.jl +++ b/src/distributions.jl @@ -20,7 +20,7 @@ function GMM(m::MixtureModel{Multivariate,Continuous,DiagNormal}) end ## conversion to MixtureModel -function Distributions.MixtureModel{T<:AbstractFloat}(gmm::GMM{T}) +function Distributions.MixtureModel(gmm::GMM{T}) where {T<:AbstractFloat} if gmm.d == 1 mixtures = [Normal(gmm.μ[i,1], gmm.Σ[i,1]) for i=1:gmm.n] elseif kind(gmm) == :full diff --git a/src/gmms.jl b/src/gmms.jl index a3a5c4b..b052b54 100644 --- a/src/gmms.jl +++ b/src/gmms.jl @@ -21,14 +21,14 @@ function GMM(n::Int, d::Int; kind::Symbol=:diag) GMM(w, μ, Σ, hist, 0) end -Base.eltype{T}(gmm::GMM{T}) = T +Base.eltype(gmm::GMM{T}) where {T} = T ## switch between full covariance and inverse cholesky decomposition representations. """ `covar(GMM.Σ)` extracts the covariances Σ (which may be encoded as chol(inv(Σ)) """ -covar{T}(ci::AbstractTriangular{T}) = (c = inv(ci); c * c') -cholinv{T}(Σ::Matrix{T}) = chol(inv(cholfact(0.5(Σ+Σ')))) +covar(ci::AbstractTriangular{T}) where {T} = (c = inv(ci); c * c') +cholinv(Σ::Matrix{T}) where {T} = cholesky(inv(cholesky(0.5(Σ+Σ')))).U """ `kind(::GMM)` returns the kind of GMM, either `:diag` or `:full` @@ -48,8 +48,8 @@ weights(gmm::GMM) = gmm.w "`means(::GMM)` returns the means `μ` of the Gaussians in the mixture" means(gmm::GMM) = gmm.μ "`covars(::GMM)` returns the covariance matrices Σ of the Gaussians in the mixture." -covars{T}(gmm::GMM{T,DiagCov{T}}) = gmm.Σ -covars{T}(gmm::GMM{T,FullCov{T}}) = [covar(ci) for ci in gmm.Σ] +covars(gmm::GMM{T,DiagCov{T}}) where {T} = gmm.Σ +covars(gmm::GMM{T,FullCov{T}}) where {T} = [covar(ci) for ci in gmm.Σ] "`nparams(::GMM)` returns the number of free parameters in the GMM" function nparams(gmm::GMM) @@ -84,7 +84,7 @@ end ## create a full cov GMM from a diag cov GMM (for testing full covariance routines) "`full(::GMM)` turns a diagonal covariance GMM into a full-covariance GMMM" -function Base.full{T}(gmm::GMM{T}) +function full(gmm::GMM{T}) where {T} if kind(gmm) == :full return gmm end @@ -95,7 +95,7 @@ end """`diag(::GMM)` turns a full-covariance GMM into a diagonal-covariance GMM, by ignoring off-diagonal elements""" -function Base.diag{T}(gmm::GMM{T}) +function LinearAlgebra.diag(gmm::GMM{T}) where {T} if kind(gmm) == :diag return gmm end @@ -138,7 +138,7 @@ function compute_range(maxn, n) end ## we could improve this a lot -function Base.show{T}(io::IO, mime::MIME"text/plain", gmm::GMM{T}) +function Base.show(io::IO, mime::MIME"text/plain", gmm::GMM{T}) where {T} println(io, @sprintf("GMM{%s} with %d components in %d dimensions and %s covariance", T, gmm.n, gmm.d, kind(gmm))) gmmkind = kind(gmm) if gmmkind == :diag @@ -182,7 +182,7 @@ end ## some routines for conversion between float types # @doc """`convert(GMM{::Type}, GMM)` convert the GMM to a different floating point type""" -> -function Base.convert{Td,Ts}(::Type{GMM{Td}}, gmm::GMM{Ts}) +function Base.convert(::Type{GMM{Td}}, gmm::GMM{Ts}) where {Td,Ts} Ts == Td && return gmm h = vcat(gmm.hist, History(string("Converted to ", Td))) w = map(Td, gmm.w) @@ -197,7 +197,7 @@ function Base.convert{Td,Ts}(::Type{GMM{Td}}, gmm::GMM{Ts}) end GMM(w, μ, Σ, h, gmm.nx) end -function Base.convert{Td,Ts}(::Type{VGMM{Td}}, vg::VGMM{Ts}) +function Base.convert(::Type{VGMM{Td}}, vg::VGMM{Ts}) where {Td,Ts} Ts == Td && return vg h = vcat(vg.hist, History(string("Converted to ", Td))) W = map(eltype(FullCov{Td}), vg.W) @@ -205,7 +205,7 @@ function Base.convert{Td,Ts}(::Type{VGMM{Td}}, vg::VGMM{Ts}) VGMM(vg.n, vg.d, π, map(Td,vg.α), map(Td, vg.β), map(Td,vg.m), map(Td, vg.ν), W, h) end -function Base.convert{Td,Ts}(::Type{GMMprior{Td}}, p::GMMprior{Ts}) +function Base.convert(::Type{GMMprior{Td}}, p::GMMprior{Ts}) where {Td,Ts} Ts == Td && return p GMMprior(map(Td, p.α₀), map(Td, p.β₀), map(Td, p.m₀), map(Td, p.ν₀), map(Td, p.W₀)) end diff --git a/src/gmmtypes.jl b/src/gmmtypes.jl index d5318cf..9fc4b65 100644 --- a/src/gmmtypes.jl +++ b/src/gmmtypes.jl @@ -83,8 +83,8 @@ mutable struct GMM{T<:AbstractFloat, CT<:CovType{T}} <: GaussianMixture{T,CT} new(n, d, w, μ, Σ, hist, nx) end end -GMM{T<:AbstractFloat}(w::Vector{T}, μ::AbstractArray{T,2}, Σ::Union{DiagCov{T},FullCov{T}}, - hist::Vector, nx::Int) = GMM{T, typeof(Σ)}(w, μ, Σ, hist, nx) +GMM(w::Vector{T}, μ::AbstractArray{T,2}, Σ::Union{DiagCov{T},FullCov{T}}, + hist::Vector, nx::Int) where {T<:AbstractFloat} = GMM{T, typeof(Σ)}(w, μ, Σ, hist, nx) ## Variational Bayes GMM types. @@ -154,7 +154,7 @@ type CSstats{T<:AbstractFloat} new(n,f) end end -CSstats{T<:AbstractFloat}(n::Vector{T}, f::Matrix{T}) = CSstats{T}(n, f) +CSstats(n::Vector{T}, f::Matrix{T}) where {T<:AbstractFloat} = CSstats{T}(n, f) ## special case for tuple (why would I need this?) CSstats(t::Tuple) = CSstats(t[1], t[2]) @@ -179,7 +179,7 @@ type Cstats{T<:AbstractFloat, CT<:VecOrMat} new(n, f, s) end end -Cstats{T<:AbstractFloat}(n::Vector{T}, f::Matrix{T}, s::MatOrVecMat{T}) = Cstats{T,typeof(s)}(n, f, s) +Cstats(n::Vector{T}, f::Matrix{T}, s::MatOrVecMat{T}) where {T<:AbstractFloat} = Cstats{T,typeof(s)}(n, f, s) Cstats(t::Tuple) = Cstats(t...) ## A data handle, either in memory or on disk, perhaps even mmapped but I haven't seen any @@ -200,7 +200,7 @@ type Data{T,VT<:Union{Matrix,AbstractString}} return new(list,API) end end -Data{T}(list::Vector{Matrix{T}}) = Data{T, eltype(list)}(list, Dict{Symbol,Function}()) -Data{S<:AbstractString}(list::Vector{S}, t::DataType, API::Dict{Symbol,Function}) = Data{t, S}(list, API) +Data(list::Vector{Matrix{T}}) where {T} = Data{T, eltype(list)}(list, Dict{Symbol,Function}()) +Data(list::Vector{S}, t::DataType, API::Dict{Symbol,Function}) where {S<:AbstractString} = Data{t, S}(list, API) DataOrMatrix{T} = Union{Data{T}, Matrix{T}} diff --git a/src/rand.jl b/src/rand.jl index fd00a81..38a2501 100644 --- a/src/rand.jl +++ b/src/rand.jl @@ -23,7 +23,7 @@ function Base.rand(::Type{GMM}, ng::Int, d::Int; sep=2.0, kind=:full) end ## local helper -function binsearch{T}(x::T, a::Vector{T}) +function binsearch(x::T, a::Vector{T}) where {T} issorted(a) || error("Array needs to be sorted") mi = 1 ma = length(a) diff --git a/src/recognizer.jl b/src/recognizer.jl index 0d675d9..268e36a 100644 --- a/src/recognizer.jl +++ b/src/recognizer.jl @@ -8,18 +8,19 @@ function dotscore(x::CSstats, y::CSstats, r::Real=1.) sum(broadcast(/, x.f, x.n + r) .* y.f) end ## or directly from the UBM and the data x and y -dotscore{T<:Real}(gmm::GMM, x::Matrix{T}, y::Matrix{T}, r::Real=1.) = +dotscore(gmm::GMM, x::Matrix{T}, y::Matrix{T}, r::Real=1.) where {T<:Real} = dotscore(CSstats(gmm, x), CSstats(gmm, y), r) ## or produce a score matrix -function dotscore{T}(x::Vector{CSstats{T}}, y::Vector{CSstats{T}}, r::Real=1.) +function dotscore(x::Vector{CSstats{T}}, y::Vector{CSstats{T}}, r::Real=1.) where {T} xx = hcat([vec(x.f ./ (x.n+r)) for x in x]...) yy = hcat([vec(y.f) for y in y]...) return xx' * yy end ## Maximum A Posteriori adapt a gmm -function maxapost{T<:AbstractFloat}(gmm::GMM, x::Matrix{T}, r::Real=16.; means::Bool=true, weights::Bool=false, covars::Bool=false) +function maxapost(gmm::GMM, x::Matrix{T}, r::Real=16.; means::Bool=true, + weights::Bool=false, covars::Bool=false) where {T<:AbstractFloat} nₓ, ll, N, F, S = stats(gmm, x) α = N ./ (N+r) if weights diff --git a/src/stats.jl b/src/stats.jl index eb62701..287934e 100644 --- a/src/stats.jl +++ b/src/stats.jl @@ -212,7 +212,7 @@ Cstats(gmm::GMM, x::DataOrMatrix, parallel=false) = Cstats(cstats(gmm, x, parall CSstats(gmm::GMM, cstats::Cstats) = CSstats(cstats.N, cstats.F ./ gmm.Σ) ## some convenience functions -Base.eltype{T}(stats::Cstats{T}) = T +Base.eltype(stats::Cstats{T}) where {T} = T Base.size(stats::Cstats) = size(stats.F) kind(stats::Cstats) = typeof(stats.S) <: Vector ? :full : :diag Base.:+(a::Cstats, b::Cstats) = Cstats(a.N + b.N, a.F + b.F, a.S + b.S) From b7f5b1ff328868237c60d9777eb6c491d379b1ce Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 14:47:14 -0400 Subject: [PATCH 02/25] type -> struct --- src/gmmtypes.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/gmmtypes.jl b/src/gmmtypes.jl index 9fc4b65..606c31b 100644 --- a/src/gmmtypes.jl +++ b/src/gmmtypes.jl @@ -21,7 +21,7 @@ """ `History`, a type to record the history of how a GMM is built. """ -type History +struct History """timestamp""" t::Float64 """description""" @@ -93,7 +93,7 @@ GMM(w::Vector{T}, μ::AbstractArray{T,2}, Σ::Union{DiagCov{T},FullCov{T}}, """ `GMMprior` is a type that holds the prior for training GMMs using Variational Bayes. """ -type GMMprior{T<:AbstractFloat} +struct GMMprior{T<:AbstractFloat} "effective prior number of observations" α₀::T β₀::T @@ -112,7 +112,7 @@ end """ `VGMM` is the type that is used to store a GMM in the Variational Bayes training. """ -type VGMM{T} <: GaussianMixture{T,Any} +mutable struct VGMM{T} <: GaussianMixture{T,Any} "number of Gaussians" n::Int "dimension of Gaussian" @@ -144,7 +144,7 @@ end """ `CSstats` a type holding centered and scaled zeroth and first order GMM statistics """ -type CSstats{T<:AbstractFloat} +struct CSstats{T<:AbstractFloat} "zeroth order stats" n::Vector{T} # zero-order stats, ng "first order stats" @@ -162,7 +162,7 @@ CSstats(t::Tuple) = CSstats(t[1], t[2]) """ `Cstats`, a type holding centered zeroth, first and second order GMM statistics """ -type Cstats{T<:AbstractFloat, CT<:VecOrMat} +struct Cstats{T<:AbstractFloat, CT<:VecOrMat} "zeroth order stats" N::Vector{T} "first order stats" @@ -193,7 +193,7 @@ Cstats(t::Tuple) = Cstats(t...) `Data` is a type for holding an array of feature vectors (i.e., matrices), or references to files on disk. The data is automatically loaded when needed, e.g., by indexing. """ -type Data{T,VT<:Union{Matrix,AbstractString}} +struct Data{T,VT<:Union{Matrix,AbstractString}} list::Vector{VT} API::Dict{Symbol,Function} function Data{T,VT}(list::Union{Vector{VT},Vector{Matrix{T}}}, API::Dict{Symbol,Function}) where{T,VT} From 3bc76c2ba7a0d51fe258ed13049c9b84c4da858d Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 14:52:36 -0400 Subject: [PATCH 03/25] Update Logging API use - info->@info - warn->@warn - Add using Logging --- src/gmms.jl | 3 ++- src/gmmtypes.jl | 2 ++ src/train.jl | 36 ++++++++++++++++++------------------ test/data.jl | 3 ++- 4 files changed, 24 insertions(+), 20 deletions(-) diff --git a/src/gmms.jl b/src/gmms.jl index b052b54..b4078b2 100644 --- a/src/gmms.jl +++ b/src/gmms.jl @@ -2,6 +2,7 @@ ## (c) 2013--2014 David A. van Leeuwen import Base.LinAlg.AbstractTriangular +using Logging ## uninitialized constructor, defaults to Float64 """ @@ -67,7 +68,7 @@ end "`addhist!(::GMM, s)` adds a comment `s` to the GMMM" function addhist!(gmm::GaussianMixture, s::AbstractString) - info(s) + @info(s) push!(gmm.hist, History(s)) gmm end diff --git a/src/gmmtypes.jl b/src/gmmtypes.jl index 606c31b..e9f7c73 100644 --- a/src/gmmtypes.jl +++ b/src/gmmtypes.jl @@ -18,6 +18,8 @@ ## force Emacs utf-8: αβγδεζηθικλμνξοπρστυφχψω +using Printf + """ `History`, a type to record the history of how a GMM is built. """ diff --git a/src/train.jl b/src/train.jl index 3fc1487..1b848f2 100644 --- a/src/train.jl +++ b/src/train.jl @@ -2,7 +2,7 @@ ## (c) 2013--2014 David A. van Leeuwen using StatsBase: sample -import Logging +using Logging ## Greate a GMM with only one mixture and initialize it to ML parameters function GMM(x::DataOrMatrix{T}; kind=:diag) where T <: AbstractFloat @@ -63,7 +63,7 @@ function sanitycheck!(gmm::GMM) if (np=length(pathological)) > 0 mesg = string(np, " pathological elements normalized") addhist!(gmm, mesg) - warn(mesg) + @warn(mesg) end pathological end @@ -73,7 +73,7 @@ end function GMMk(n::Int, x::DataOrMatrix{T}; kind=:diag, nInit::Int=50, nIter::Int=10, sparse=0) where T nₓ, d = size(x) hist = [History(@sprintf("Initializing GMM, %d Gaussians %s covariance %d dimensions using %d data points", n, diag, d, nₓ))] - info(last(hist).s) + @info(last(hist).s) ## subsample x to max 1000 points per mean nneeded = 1000*n if nₓ < nneeded @@ -97,14 +97,14 @@ function GMMk(n::Int, x::DataOrMatrix{T}; kind=:diag, nInit::Int=50, nIter::Int= xx = vcat(yy...) end end - if Logging._root.level ≤ Logging.DEBUG + #if Logging._root.level ≤ Logging.DEBUG loglevel = :iter - elseif Logging._root.level ≤ Logging.INFO - loglevel = :final - else - loglevel = :none - end - km = Clustering.kmeans(xx', n, maxiter=nInit, display = loglevel) + #elseif Logging._root.level ≤ Logging.INFO + # loglevel = :final + #else + # loglevel = :none + #end + km = Clustering.kmeans(xx'[:,:], n, maxiter=nInit, display = loglevel) μ::Matrix{T} = km.centers' if kind == :diag ## helper that deals with centers with singleton datapoints. @@ -134,7 +134,7 @@ function GMMk(n::Int, x::DataOrMatrix{T}; kind=:diag, nInit::Int=50, nIter::Int= nxx = size(xx,1) ng = length(w) push!(hist, History(string("K-means with ", nxx, " data points using ", km.iterations, " iterations\n", @sprintf("%3.1f data points per parameter",nxx/((d+1)ng))))) - info(last(hist).s) + @info(last(hist).s) gmm = GMM(w, μ, Σ, hist, nxx) sanitycheck!(gmm) em!(gmm, x; nIter=nIter, sparse=sparse) @@ -149,14 +149,14 @@ function GMM2(n::Int, x::DataOrMatrix; kind=:diag, nIter::Int=10, nFinal::Int=nI 2^log2n == n || error("n must be power of 2") gmm = GMM(x, kind=kind) tll = [avll(gmm,x)] - info("0: avll = ", tll[1]) + @info("0: avll = ", tll[1]) for i=1:log2n gmm = gmmsplit(gmm) avll = em!(gmm, x; nIter=i==log2n ? nFinal : nIter, sparse=sparse) - info(i, ": avll = ", avll) + @info(i, ": avll = ", avll) append!(tll, avll) end - info("Total log likelihood: ", tll) + @info("Total log likelihood: ", tll) gmm end @@ -189,7 +189,7 @@ function gmmsplit(gmm::GMM{T}; minweight=1e-5, sep=0.2) where T maxi = reverse(sortperm(gmm.w)) offInd = find(gmm.w .< minweight) if (length(offInd)>0) - info("Removing Gaussians with no data"); + @info("Removing Gaussians with no data"); end for i=1:length(offInd) gmm.w[maxi[i]] = gmm.w[offInd[i]] = gmm.w[maxi[i]]/2; @@ -238,7 +238,7 @@ function em!(gmm::GMM, x::DataOrMatrix; nIter::Int = 10, varfloor::Float64=1e-3, initc = gmm.Σ ll = zeros(nIter) gmmkind = kind(gmm) - info(string("Running ", nIter, " iterations EM on ", gmmkind, " cov GMM with ", ng, " Gaussians in ", d, " dimensions")) + @info(string("Running ", nIter, " iterations EM on ", gmmkind, " cov GMM with ", ng, " Gaussians in ", d, " dimensions")) for i=1:nIter ## E-step nₓ, ll[i], N, F, S = stats(gmm, x, parallel=true) @@ -251,13 +251,13 @@ function em!(gmm::GMM, x::DataOrMatrix; nIter::Int = 10, varfloor::Float64=1e-3, tooSmall = any(gmm.Σ .< varfloor, 2) if (any(tooSmall)) ind = find(tooSmall) - warn("Variances had to be floored ", join(ind, " ")) + @warn("Variances had to be floored ", join(ind, " ")) gmm.Σ[ind,:] = initc[ind,:] end elseif gmmkind == :full for k=1:ng if N[k] < d - warn(@sprintf("Too low occupancy count %3.1f for Gausian %d", N[k], k)) + @warn(@sprintf("Too low occupancy count %3.1f for Gausian %d", N[k], k)) else μk = vec(gmm.μ[k,:]) ## v0.5 arraymageddon gmm.Σ[k] = cholinv(S[k] / N[k] - μk * μk') diff --git a/test/data.jl b/test/data.jl index 0f785aa..711025c 100644 --- a/test/data.jl +++ b/test/data.jl @@ -1,7 +1,8 @@ ## data.jl Test some functionality of the Data type ## (c) 2015 David A. van Leeuwen +using Logging -info("Testing Data") +@info("Testing Data") using JLD for i = 1:10 From 755fee5896324c8d1b681f9d479fbf4ece1aa8a7 Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 14:54:51 -0400 Subject: [PATCH 04/25] Update Statistics API use --- REQUIRE | 1 + src/data.jl | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/REQUIRE b/REQUIRE index 7da9dec..74fab79 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,6 +1,7 @@ julia 0.6 Clustering 0.6.0 Distributions +StatsBase PDMats Compat JLD diff --git a/src/data.jl b/src/data.jl index fbefee7..af14c19 100644 --- a/src/data.jl +++ b/src/data.jl @@ -1,4 +1,5 @@ ## data.jl Julia code to handle matrix-type data on disc +using Statistics ## report the kind of Data structure from the type instance kind(d::Data{T,S}) where {T,S<:AbstractString} = :file @@ -218,17 +219,17 @@ end Base.sum(d::Data, dim::Int) = retranspose(stats(d,1, dim=dim)[2], dim) -function Base.mean(d::Data) +function Statistics.mean(d::Data) n, sx = stats(d, 1) sum(sx) / (n*length(sx)) end - function Base.mean(d::Data, dim::Int) + function Statistics.mean(d::Data, dim::Int) n, sx = stats(d, 1, dim=dim) return retranspose(sx ./ n, dim) end -function Base.var(d::Data) +function Statistics.var(d::Data) n, sx, sxx = stats(d, 2) n *= length(sx) ssx = sum(sx) # keep type stability... @@ -237,13 +238,13 @@ function Base.var(d::Data) return (ssxx - n*μ^2) / (n - 1) end -function Base.var(d::Data, dim::Int) +function Statistics.var(d::Data, dim::Int) n, sx, sxx = stats(d, 2, dim=dim) μ = sx ./ n return retranspose((sxx - n*μ.^2) ./ (n-1), dim) end -function Base.cov(d::Data) +function Statistics.cov(d::Data) n, sx, sxx = stats(d, 2, kind=:full) μ = sx ./ n (sxx - n*μ*μ') ./ (n-1) From 8bb99b030729a8cbb8aadcd0ef70485e831aa65e Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 14:57:19 -0400 Subject: [PATCH 05/25] Update Array{}() undef initializers --- src/bayes.jl | 2 +- src/data.jl | 2 +- src/rand.jl | 4 ++-- src/stats.jl | 2 +- src/train.jl | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/bayes.jl b/src/bayes.jl index 8f891f5..a2e7091 100644 --- a/src/bayes.jl +++ b/src/bayes.jl @@ -67,7 +67,7 @@ function mstep(π::GMMprior, N::Vector{T}, mx::Matrix{T}, S::Vector) where {T} ν = π.ν₀ + N + 1 # ng, 10.63 β = π.β₀ + N # ng, 10.60 m = similar(mx) # ng × d - W = Array{eltype(FullCov{T})}(ng) # ng × (d*d) + W = Array{eltype(FullCov{T})}(undef, ng) # ng × (d*d) d = size(mx,2) limit = √ eps(eltype(N)) W₀⁻¹ = inv(π.W₀) diff --git a/src/data.jl b/src/data.jl index af14c19..3023776 100644 --- a/src/data.jl +++ b/src/data.jl @@ -106,7 +106,7 @@ end function dmapreduce(f::Function, op::Function, x::Data) nₓ = length(x) nw = nworkers() - results = Array{Any}(nw) ## will contain pointers for parallel return value. + results = Array{Any}(undef, nw) ## will contain pointers for parallel return value. valid = Any[false for i=1:nw] # can't use bitarray as parallel return value, must be pointers id=0 nextid() = (id += 1) diff --git a/src/rand.jl b/src/rand.jl index 38a2501..6f4fe88 100644 --- a/src/rand.jl +++ b/src/rand.jl @@ -8,7 +8,7 @@ function Base.rand(::Type{GMM}, ng::Int, d::Int; sep=2.0, kind=:full) if kind==:diag Σ = hcat([rand(Chisq(1.0), ng) for i=1:d]...) elseif kind == :full - Σ = Array{eltype(FullCov{Float64})}(ng) + Σ = Array{eltype(FullCov{Float64})}(undef, ng) for i=1:ng T = randn(d,d) Σ[i] = cholinv(T' * T / d) @@ -47,7 +47,7 @@ forcesymmetric(c::Matrix) = full(Symmetric(c)) ## This function samples n data points from a GMM. This is pretty slow, probably due to the array assignments. function Base.rand(gmm::GMM, n::Int) - x = Array{Float64}(n, gmm.d) + x = Array{Float64}(undef, n, gmm.d) ## generate indices distriuted according to weights index = mapslices(find, rand(Multinomial(1, gmm.w), n), 1) gmmkind = kind(gmm) diff --git a/src/stats.jl b/src/stats.jl index 287934e..f10d81d 100644 --- a/src/stats.jl +++ b/src/stats.jl @@ -93,7 +93,7 @@ function stats(gmm::GMM{GT,FCT}, x::Array{T,2}, order::Int) where FCT <: FullCov end ## S_k = Σ_i γ _ik x_i' * x S = Matrix{RT}[] - γx = Array{RT}(nₓ, d) + γx = Array{RT}(undef, nₓ, d) @inbounds for k=1:ng #broadcast!(*, γx, γ[:,k], x) # nₓ × d mults for j = 1:d for i=1:nₓ diff --git a/src/train.jl b/src/train.jl index 1b848f2..809cc33 100644 --- a/src/train.jl +++ b/src/train.jl @@ -326,8 +326,8 @@ function llpg(gmm::GMM{GT,FCT}, x::Matrix{T}) where FCT <: FullCov{GT} where {GT (nₓ, d) = size(x) ng = gmm.n d==gmm.d || error("Inconsistent size gmm and x") - ll = Array{RT}(nₓ, ng) - Δ = Array{RT}(nₓ, d) + ll = Array{RT}(undef, nₓ, ng) + Δ = Array{RT}(undef, nₓ, d) ## Σ's now are inverse choleski's, so logdet becomes -2sum(log(diag)) normalization = [0.5d*log(2π) - sum(log.(diag((gmm.Σ[k])))) for k=1:ng] for k=1:ng From 9aba424b5cf2feabda26d1ea1203346dc94e9507 Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 14:58:03 -0400 Subject: [PATCH 06/25] srand() -> Random.seed!() --- test/scikitlearn.jl | 3 ++- test/testbayes.jl | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/test/scikitlearn.jl b/test/scikitlearn.jl index c3a5e39..da189f2 100644 --- a/test/scikitlearn.jl +++ b/test/scikitlearn.jl @@ -1,4 +1,5 @@ using ScikitLearnBase +using Random ## 1. Generate synthetic data from two distinct Gaussians: n_samples_A and ## n_samples_B data points @@ -10,7 +11,7 @@ n_samples_A = 300 n_samples_B = 600 # generate spherical data centered on (20, 20) -srand(42) +Random.seed!(42) shifted_gaussian = randn(n_samples_A, 2) .+ [20, 20]' # generate twice as many points from zero centered stretched Gaussian data diff --git a/test/testbayes.jl b/test/testbayes.jl index 48b11ec..a079c4c 100644 --- a/test/testbayes.jl +++ b/test/testbayes.jl @@ -1,5 +1,6 @@ +using Random reload("nomodule.jl") -srand(1) +Random.seed!(1) xx = readdlm("test/faithful.txt") gg = GMM(8, xx, nIter=0, kind=:full) pp = GMMprior(gg.d, 0.1, 1.0) From 298efe9c5140fce78554da03db2ef63e3f05458a Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 14:58:58 -0400 Subject: [PATCH 07/25] test/bayes.jl: Add using DelimitedFiles --- test/bayes.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/bayes.jl b/test/bayes.jl index 20371b3..d9d8f9d 100644 --- a/test/bayes.jl +++ b/test/bayes.jl @@ -1,3 +1,4 @@ +using DelimitedFiles ## test VB GMM for the standard example data x = readdlm("faithful.txt") ## only do k-means From 1efe9cb87b56ed97eb2d9386aa0f835d528ec98f Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 15:01:15 -0400 Subject: [PATCH 08/25] 1./x -> 1 ./ x for 0.7+ compat --- src/gmms.jl | 4 ++-- src/stats.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gmms.jl b/src/gmms.jl index b4078b2..9d1fdb3 100644 --- a/src/gmms.jl +++ b/src/gmms.jl @@ -89,7 +89,7 @@ function full(gmm::GMM{T}) where {T} if kind(gmm) == :full return gmm end - Σ = convert(FullCov{T}, [UpperTriangular(diagm(vec(1./√gmm.Σ[i,:]))) for i=1:gmm.n]) + Σ = convert(FullCov{T}, [UpperTriangular(diagm(vec(1 ./√gmm.Σ[i,:]))) for i=1:gmm.n]) new = GMM(copy(gmm.w), copy(gmm.μ), Σ, copy(gmm.hist), gmm.nx) addhist!(new, "Converted to full covariance") end @@ -102,7 +102,7 @@ function LinearAlgebra.diag(gmm::GMM{T}) where {T} end Σ = Array{T}(gmm.n, gmm.d) for i=1:gmm.n - Σ[i,:] = 1./abs2(diag(gmm.Σ[i])) + Σ[i,:] = 1 ./ abs2(diag(gmm.Σ[i])) end new = GMM(copy(gmm.w), copy(gmm.μ), Σ, copy(gmm.hist), gmm.nx) addhist!(new, "Converted to diag covariance") diff --git a/src/stats.jl b/src/stats.jl index f10d81d..d19863e 100644 --- a/src/stats.jl +++ b/src/stats.jl @@ -42,7 +42,7 @@ function stats(gmm::GMM{GT,DCT}, x::Matrix{T}, order::Int) where DCT <: DiagCov{ ng = gmm.n gmm.d == d || error("dimension mismatch for data") 1 ≤ order ≤ 2 || error("order out of range") - prec::Matrix{RT} = 1./gmm.Σ # ng × d + prec::Matrix{RT} = 1 ./ gmm.Σ # ng × d mp::Matrix{RT} = gmm.μ .* prec # mean*precision, ng × d ## note that we add exp(-sm2p/2) later to pxx for numerical stability a::Matrix{RT} = gmm.w ./ ((2π)^(d/2) * sqrt.(prod(gmm.Σ, 2))) # ng × 1 From 2143cec375eefd5823a7395f122527e507bc00f5 Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 15:03:04 -0400 Subject: [PATCH 09/25] find() -> findall() --- src/data.jl | 2 +- src/rand.jl | 2 +- src/train.jl | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/data.jl b/src/data.jl index 3023776..8ff27da 100644 --- a/src/data.jl +++ b/src/data.jl @@ -133,7 +133,7 @@ function dmapreduce(f::Function, op::Function, x::Data) end end end - reduce(op, results[find(valid)]) + reduce(op, results[findall(valid)]) end ## stats: compute nth order stats for array (this belongs in stats.jl) diff --git a/src/rand.jl b/src/rand.jl index 6f4fe88..2b99e61 100644 --- a/src/rand.jl +++ b/src/rand.jl @@ -52,7 +52,7 @@ function Base.rand(gmm::GMM, n::Int) index = mapslices(find, rand(Multinomial(1, gmm.w), n), 1) gmmkind = kind(gmm) for i=1:gmm.n - ind = find(index.==i) + ind = findall(index.==i) nₓ = length(ind) if gmmkind == :diag x[ind,:] = (vec(gmm.μ[i,:]) .+ vec(sqrt.(gmm.Σ[i,:])) .* randn(gmm.d, nₓ))' ## v0.5 arraymageddon diff --git a/src/train.jl b/src/train.jl index 809cc33..4d6c5a1 100644 --- a/src/train.jl +++ b/src/train.jl @@ -43,18 +43,18 @@ GMM(n::Int, x::Vector{T}; method::Symbol=:kmeans, nInit::Int=50, nIter::Int=10, ## we sometimes end up with pathological gmms function sanitycheck!(gmm::GMM) pathological=NTuple{2}[] - for i in find(isnan.(gmm.μ) .| isinf.(gmm.μ)) + for i in findall(isnan.(gmm.μ) .| isinf.(gmm.μ)) gmm.μ[i] = 0 push!(pathological, ind2sub(size(gmm.μ), i)) end if kind(gmm) == :diag - for i in find(isnan.(gmm.Σ) .| isinf.(gmm.Σ)) + for i in findall(isnan.(gmm.Σ) .| isinf.(gmm.Σ)) gmm.Σ[i] = 1 push!(pathological, ind2sub(size(gmm.Σ), i)) end else for (si, s) in enumerate(gmm.Σ) - for i in find(isnan.(s) .| isinf.(s)) + for i in findall(isnan.(s) .| isinf.(s)) s[i] = 1 push!(pathological, (si, i)) end @@ -187,7 +187,7 @@ function gmmsplit(gmm::GMM{T}; minweight=1e-5, sep=0.2) where T tsep::T = sep ## In this function i, j, and k all index Gaussians maxi = reverse(sortperm(gmm.w)) - offInd = find(gmm.w .< minweight) + offInd = findall(gmm.w .< minweight) if (length(offInd)>0) @info("Removing Gaussians with no data"); end @@ -250,7 +250,7 @@ function em!(gmm::GMM, x::DataOrMatrix; nIter::Int = 10, varfloor::Float64=1e-3, ## var flooring tooSmall = any(gmm.Σ .< varfloor, 2) if (any(tooSmall)) - ind = find(tooSmall) + ind = findall(tooSmall) @warn("Variances had to be floored ", join(ind, " ")) gmm.Σ[ind,:] = initc[ind,:] end From ec69cfd75aa17a7b0f3e849b23f01ee65d83038d Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 15:07:16 -0400 Subject: [PATCH 10/25] Add keyword dims to functions with API changes --- src/bayes.jl | 6 +++--- src/compat.jl | 2 +- src/data.jl | 6 +++--- src/rand.jl | 3 ++- src/scikitlearn.jl | 3 +-- src/stats.jl | 8 ++++---- src/train.jl | 10 +++++----- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/bayes.jl b/src/bayes.jl index a2e7091..ffb38d6 100644 --- a/src/bayes.jl +++ b/src/bayes.jl @@ -114,7 +114,7 @@ function logρ(vg::VGMM, x::Matrix, ex::Tuple) ### d/vg.β[k] + vg.ν[k] * (x_i - m_k)' W_k (x_i = m_k) forall i ## Δ = (x_i - m_k)' W_k (x_i = m_k) xμTΛxμ!(Δ, x, vec(vg.m[k,:]), vg.W[k]) - EμΛ[:,k] = d/vg.β[k] .+ vg.ν[k] * sum(abs2, Δ, 2) + EμΛ[:,k] = d/vg.β[k] .+ vg.ν[k] * sum(abs2, Δ, dims=2) end Elogπ, ElogdetΛ = ex (Elogπ + 0.5ElogdetΛ .- 0.5d*log(2π))' .- 0.5EμΛ @@ -154,8 +154,8 @@ function stats(vg::VGMM, x::Matrix{T}, ex::Tuple) where {T} return 0, zero(RT), zeros(RT, ng), zeros(RT, ng, d), [zeros(RT, d,d) for k=1:ng] end r, ElogpZπqZ = rnk(vg, x, ex) # nₓ × ng - N = vec(sum(r, 1)) # ng - F = r' * x # ng × d + N = vec(sum(r, dims=1)) # ng + F = r' * x # ng × d ## S_k = sum_i r_ik x_i x_i' ## Sm = x' * hcat([broadcast(*, r[:,k], x) for k=1:ng]...) SS = similar(x, nₓ, d*ng) # nₓ*nd*ng mem, nₓ*nd*ng multiplications diff --git a/src/compat.jl b/src/compat.jl index d51e281..5eb37fb 100644 --- a/src/compat.jl +++ b/src/compat.jl @@ -8,7 +8,7 @@ function logsumexp(x::AbstractVector{T}) where {T<:AbstractFloat} m = maximum(x) log(sum(exp.(x .- m))) + m end -logsumexp(x::Matrix{T}, dim::Integer) where {T<:AbstractFloat} = mapslices(logsumexp, x, dim) +logsumexp(x::Matrix{T}, dim::Integer) where {T<:AbstractFloat} = mapslices(logsumexp, x, dims=dim) ## Also NumericExtensions' semantics of dot() is no longer supported. function LinearAlgebra.dot(x::Matrix{T}, y::Matrix{T}) where {T<:AbstractFloat} diff --git a/src/data.jl b/src/data.jl index 8ff27da..91248cc 100644 --- a/src/data.jl +++ b/src/data.jl @@ -145,9 +145,9 @@ function stats(x::Matrix{T}, order::Int=2; kind=:diag, dim=1) where {T<:Abstract end if kind == :diag if order == 2 - return n, vec(sum(x, dim)), vec(sum(abs2, x, dim)) + return n, vec(sum(x, dims=dim)), vec(sum(abs2, x, dims=dim)) elseif order == 1 - return n, vec(sum(x, dim)) + return n, vec(sum(x, dims=dim)) else sx = zeros(T, order, d) for j=1:d @@ -169,7 +169,7 @@ function stats(x::Matrix{T}, order::Int=2; kind=:diag, dim=1) where {T<:Abstract elseif kind == :full order == 2 || error("Can only do covar starts for order=2") ## lazy implementation - sx = vec(sum(x, dim)) + sx = vec(sum(x, dims=dim)) sxx = x' * x return n, sx, sxx else diff --git a/src/rand.jl b/src/rand.jl index 2b99e61..ed1246e 100644 --- a/src/rand.jl +++ b/src/rand.jl @@ -49,7 +49,8 @@ forcesymmetric(c::Matrix) = full(Symmetric(c)) function Base.rand(gmm::GMM, n::Int) x = Array{Float64}(undef, n, gmm.d) ## generate indices distriuted according to weights - index = mapslices(find, rand(Multinomial(1, gmm.w), n), 1) + index::Vector{Int} = mapslices(findall, rand(Multinomial(1, gmm.w), n).!=0, + dims=1)[:] gmmkind = kind(gmm) for i=1:gmm.n ind = findall(index.==i) diff --git a/src/scikitlearn.jl b/src/scikitlearn.jl index 43d9578..da0012a 100644 --- a/src/scikitlearn.jl +++ b/src/scikitlearn.jl @@ -49,8 +49,7 @@ ScikitLearnBase.predict_log_proba(gmm::GMM, X) = log(gmmposterior(gmm, X)[1]) ScikitLearnBase.predict_proba(gmm::GMM, X) = gmmposterior(gmm, X)[1] ScikitLearnBase.predict(gmm::GMM, X) = # This is just `argmax(axis=2)`. It's very verbose in Julia. - ind2sub(size(X), - vec(findmax(ScikitLearnBase.predict_proba(gmm, X), 2)[2]))[2] + CartesianIndices(size(X))[vec(findmax(ScikitLearnBase.predict_proba(gmm, X), dims=2)[2])][2] """ `density(gmm::GMM, X)` returns `log(P(X|μ, Σ))` """ function density(gmm::GMM, X) diff --git a/src/stats.jl b/src/stats.jl index d19863e..d639e3a 100644 --- a/src/stats.jl +++ b/src/stats.jl @@ -45,7 +45,7 @@ function stats(gmm::GMM{GT,DCT}, x::Matrix{T}, order::Int) where DCT <: DiagCov{ prec::Matrix{RT} = 1 ./ gmm.Σ # ng × d mp::Matrix{RT} = gmm.μ .* prec # mean*precision, ng × d ## note that we add exp(-sm2p/2) later to pxx for numerical stability - a::Matrix{RT} = gmm.w ./ ((2π)^(d/2) * sqrt.(prod(gmm.Σ, 2))) # ng × 1 + a::Matrix{RT} = gmm.w ./ ((2π)^(d/2) * sqrt.(prod(gmm.Σ, dims=2))) # ng × 1 sm2p::Matrix{RT} = dot(mp, gmm.μ, 2) # sum over d mean^2*precision, ng × 1 xx = x .* x # nₓ × d ## γ = broadcast(*, a', exp(x * mp' .- 0.5xx * prec')) # nₓ × ng, Likelihood per frame per Gaussian @@ -58,10 +58,10 @@ function stats(gmm::GMM{GT,DCT}, x::Matrix{T}, order::Int) where DCT <: DiagCov{ end end for i = 1:length(γ) @inbounds γ[i] = exp(γ[i]) end - lpf=sum(γ,2) # nₓ × 1, Likelihood per frame + lpf=sum(γ,dims=2) # nₓ × 1, Likelihood per frame broadcast!(/, γ, γ, lpf .+ (lpf .== 0)) # nₓ × ng, posterior per frame per gaussian ## zeroth order - N = vec(sum(γ, 1)) # ng, vec() + N = vec(sum(γ, dims=1)) # ng, vec() ## first order F = γ' * x # ng × d, Julia has efficient a' * b llh = sum(log.(lpf)) # total log likeliood @@ -85,7 +85,7 @@ function stats(gmm::GMM{GT,FCT}, x::Array{T,2}, order::Int) where FCT <: FullCov γ, ll = gmmposterior(gmm, x) # nₓ × ng, both llh = sum(logsumexp(ll .+ log.(gmm.w)', 2)) ## zeroth order - N = vec(sum(γ, 1)) + N = vec(sum(γ, dims=1)) ## first order F = γ' * x if order == 1 diff --git a/src/train.jl b/src/train.jl index 4d6c5a1..c47877d 100644 --- a/src/train.jl +++ b/src/train.jl @@ -113,7 +113,7 @@ function GMMk(n::Int, x::DataOrMatrix{T}; kind=:diag, nInit::Int=50, nIter::Int= if length(sel) < 2 return ones(1,d) else - return var(xx[sel,:],1) + return var(xx[sel,:],dims=1) end end Σ = convert(Matrix{T},vcat(map(variance, 1:n)...)) @@ -248,7 +248,7 @@ function em!(gmm::GMM, x::DataOrMatrix; nIter::Int = 10, varfloor::Float64=1e-3, if gmmkind == :diag gmm.Σ = S ./ N - gmm.μ.^2 ## var flooring - tooSmall = any(gmm.Σ .< varfloor, 2) + tooSmall = any(gmm.Σ .< varfloor, dims=2) if (any(tooSmall)) ind = findall(tooSmall) @warn("Variances had to be floored ", join(ind, " ")) @@ -295,8 +295,8 @@ function llpg(gmm::GMM{GT,DCT}, x::Matrix{T}) where DCT <: DiagCov{GT} where {GT prec::Matrix{RT} = 1 ./ gmm.Σ # ng × d mp = gmm.μ .* prec # mean*precision, ng × d ## note that we add exp(-sm2p/2) later to pxx for numerical stability - normalization = 0.5 * (d * log(2π) .+ sum(log.(gmm.Σ), 2)) # ng × 1 - sm2p = sum(mp .* gmm.μ, 2) # sum over d mean^2*precision, ng × 1 + normalization = 0.5 * (d * log(2π) .+ sum(log.(gmm.Σ), dims=2)) # ng × 1 + sm2p = sum(mp .* gmm.μ, dims=2) # sum over d mean^2*precision, ng × 1 ## from here on data-dependent calculations xx = x.^2 # nₓ × d pxx = sm2p' .+ xx * prec' # nₓ × ng @@ -333,7 +333,7 @@ function llpg(gmm::GMM{GT,FCT}, x::Matrix{T}) where FCT <: FullCov{GT} where {GT for k=1:ng ## Δ = (x_i - μ_k)' Λ_κ (x_i - m_k) xμTΛxμ!(Δ, x, vec(gmm.μ[k,:]), gmm.Σ[k]) - ll[:,k] = -0.5 * sum(abs2,Δ,2) .- normalization[k] + ll[:,k] = -0.5 * sum(abs2,Δ,dims=2) .- normalization[k] end return ll::Matrix{RT} end From 9d0e4178516219438f19d77f9260a7a50bc03e84 Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 15:09:46 -0400 Subject: [PATCH 11/25] Partial Fix for Distributed Compat --- src/compat.jl | 13 ------------- src/data.jl | 1 + test/data.jl | 7 ++----- 3 files changed, 3 insertions(+), 18 deletions(-) diff --git a/src/compat.jl b/src/compat.jl index 5eb37fb..d26c130 100644 --- a/src/compat.jl +++ b/src/compat.jl @@ -1,7 +1,4 @@ UTriangular(a::Matrix) = UpperTriangular(a) -if VERSION < v"0.5.0-dev" - Base.cholfact(s::Symmetric) = cholfact(full(s)) -end ## NumericExtensions is no longer supported, underoptimized implementation: function logsumexp(x::AbstractVector{T}) where {T<:AbstractFloat} @@ -34,13 +31,3 @@ function LinearAlgebra.dot(x::Matrix{T}, y::Matrix{T}, dim::Integer) where {T<:A end r end - -if VERSION < v"0.5.0-dev+2023" - displaysize(io::IO) = Base.tty_size() -end - -## this we need for xμTΛxμ! -#Base.A_mul_Bc!(A::StridedMatrix{Float64}, B::AbstractTriangular{Float32}) = A_mul_Bc!(A, convert(AbstractMatrix{Float64}, B)) -#Base.A_mul_Bc!(A::Matrix{Float32}, B::AbstractTriangular{Float64}) = A_mul_Bc!(A, convert(AbstractMatrix{Float32}, B)) -## this for diagstats -#Base.BLAS.gemm!(a::Char, b::Char, alpha::Float64, A::Matrix{Float32}, B::Matrix{Float64}, beta::Float64, C::Matrix{Float64}) = Base.BLAS.gemm!(a, b, alpha, float64(A), B, beta, C) diff --git a/src/data.jl b/src/data.jl index 91248cc..c1d3b01 100644 --- a/src/data.jl +++ b/src/data.jl @@ -1,5 +1,6 @@ ## data.jl Julia code to handle matrix-type data on disc using Statistics +using Distributed ## report the kind of Data structure from the type instance kind(d::Data{T,S}) where {T,S<:AbstractString} = :file diff --git a/test/data.jl b/test/data.jl index 711025c..2a884a9 100644 --- a/test/data.jl +++ b/test/data.jl @@ -1,6 +1,7 @@ ## data.jl Test some functionality of the Data type ## (c) 2015 David A. van Leeuwen using Logging +using Distributed @info("Testing Data") @@ -11,17 +12,13 @@ end x = Matrix{Float64}[load("$i.jld", "data") for i=1:10] using GaussianMixtures -if nprocs()==1 && VERSION < v"0.5.0-dev" - addprocs(2) -end -@everywhere using GaussianMixtures g = rand(GMM, 2, 3) d = Data(x) dd = Data(["$i.jld" for i=1:10], Float64) f1(gmm, data) = GaussianMixtures.dmapreduce(x->stats(gmm, x), +, data) -f2(gmm, data) = reduce(+, pmap(x->stats(gmm, x), data)) +f2(gmm, data) = reduce(+, map(x->stats(gmm, x), data)) sleep(1) println(f2(g,dd)) From 498f90f296b05f0a63b0a2f75dfeaa0cc298848f Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 15:11:24 -0400 Subject: [PATCH 12/25] Update LinearAlgebra API usage --- src/compat.jl | 5 +++++ src/gmms.jl | 2 +- src/rand.jl | 2 +- src/stats.jl | 2 +- src/train.jl | 5 ++++- 5 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/compat.jl b/src/compat.jl index d26c130..53d5f24 100644 --- a/src/compat.jl +++ b/src/compat.jl @@ -1,3 +1,5 @@ +using LinearAlgebra + UTriangular(a::Matrix) = UpperTriangular(a) ## NumericExtensions is no longer supported, underoptimized implementation: @@ -7,6 +9,9 @@ function logsumexp(x::AbstractVector{T}) where {T<:AbstractFloat} end logsumexp(x::Matrix{T}, dim::Integer) where {T<:AbstractFloat} = mapslices(logsumexp, x, dims=dim) +eye(n::Int) = Matrix(1.0I,n,n) +eye(::Type{Float64}, n::Int) = Matrix(1.0I,n,n) + ## Also NumericExtensions' semantics of dot() is no longer supported. function LinearAlgebra.dot(x::Matrix{T}, y::Matrix{T}) where {T<:AbstractFloat} size(x) == size(y) || error("Matrix sizes must match") diff --git a/src/gmms.jl b/src/gmms.jl index 9d1fdb3..a0faba4 100644 --- a/src/gmms.jl +++ b/src/gmms.jl @@ -1,7 +1,7 @@ ## gmms.jl Some functions for a Gaussia Mixture Model ## (c) 2013--2014 David A. van Leeuwen -import Base.LinAlg.AbstractTriangular +import LinearAlgebra.AbstractTriangular using Logging ## uninitialized constructor, defaults to Float64 diff --git a/src/rand.jl b/src/rand.jl index ed1246e..69a0d16 100644 --- a/src/rand.jl +++ b/src/rand.jl @@ -43,7 +43,7 @@ function binsearch(x::T, a::Vector{T}) where {T} return mi end -forcesymmetric(c::Matrix) = full(Symmetric(c)) +forcesymmetric(c::Matrix) = (Symmetric(c)) ## This function samples n data points from a GMM. This is pretty slow, probably due to the array assignments. function Base.rand(gmm::GMM, n::Int) diff --git a/src/stats.jl b/src/stats.jl index d639e3a..68988dd 100644 --- a/src/stats.jl +++ b/src/stats.jl @@ -50,7 +50,7 @@ function stats(gmm::GMM{GT,DCT}, x::Matrix{T}, order::Int) where DCT <: DiagCov{ xx = x .* x # nₓ × d ## γ = broadcast(*, a', exp(x * mp' .- 0.5xx * prec')) # nₓ × ng, Likelihood per frame per Gaussian γ = x * mp' # nₓ × ng, nₓ * d * ng multiplications - Base.BLAS.gemm!('N', 'T', -one(RT)/2, xx, prec, one(RT), γ) + LinearAlgebra.BLAS.gemm!('N', 'T', -one(RT)/2, xx, prec, one(RT), γ) for j = 1:ng la = log(a[j]) - 0.5sm2p[j] for i = 1:nₓ diff --git a/src/train.jl b/src/train.jl index c47877d..9a0ab11 100644 --- a/src/train.jl +++ b/src/train.jl @@ -3,6 +3,7 @@ using StatsBase: sample using Logging +using LinearAlgebra ## Greate a GMM with only one mixture and initialize it to ML parameters function GMM(x::DataOrMatrix{T}; kind=:diag) where T <: AbstractFloat @@ -317,7 +318,9 @@ function xμTΛxμ!(Δ::Matrix, x::Matrix, μ::Vector, ciΣ::UpperTriangular) Δ[i,j] = x[i,j] - μj end end - A_mul_Bc!(Δ, ciΣ) # size: nₓ × d, mult ops nₓ*d^2 + tmp = Δ*ciΣ' # size: nₓ × d, mult ops nₓ*d^2 + + Δ[:,:] .= tmp[:,:] end ## full covariance version of llpg() From cdefecf7ffaa20d361819e3cd8ffbf02ada07b19 Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 15:12:47 -0400 Subject: [PATCH 13/25] cholesky/SpecialFunctions Update to 0.7+ --- src/bayes.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/bayes.jl b/src/bayes.jl index ffb38d6..486b2ba 100644 --- a/src/bayes.jl +++ b/src/bayes.jl @@ -13,6 +13,7 @@ ## e.g., W₀⁻¹ where others might write W0inv. using SpecialFunctions: digamma +using SpecialFunctions: lgamma ## initialize a prior with minimal knowledge function GMMprior(d::Int, alpha::T, beta::T) where {T<:AbstractFloat} @@ -77,10 +78,11 @@ function mstep(π::GMMprior, N::Vector{T}, mx::Matrix{T}, S::Vector) where {T} Δ = vec(mx[k,:]) - π.m₀ ## v0.5 arraymageddon ## do some effort to keep the matrix positive definite third = π.β₀ * N[k] / (π.β₀ + N[k]) * (Δ * Δ') # guarantee symmety in Δ Δ' - W[k] = chol(inv(cholfact(Symmetric(W₀⁻¹ + N[k]*S[k] + third)))) # 10.62 + W[k] = cholesky(inv(cholesky(Symmetric(W₀⁻¹ + N[k]*S[k] + + third)))).U # 10.62 else m[k,:] = zeros(d) - W[k] = chol(eye(d)) + W[k] = cholesky(eye(d)) end end return α, β, m, ν, W From 4b9f0c39a709ad779bfeb18f740917402fadc10d Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 15:13:15 -0400 Subject: [PATCH 14/25] Update Data iterator to 1.0 style --- src/data.jl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/data.jl b/src/data.jl index c1d3b01..73843e7 100644 --- a/src/data.jl +++ b/src/data.jl @@ -68,9 +68,15 @@ end ## define an iterator for Data Base.length(x::Data) = length(x.list) -Base.start(x::Data) = 0 -Base.next(x::Data, state::Int) = x[state+1], state+1 -Base.done(x::Data, state::Int) = state == length(x) +function Base.iterate(x::Data, state=1) + count = state + + if(count > length(x.list)) + return nothing + else + return (x[count], count+1) + end +end ## This function is like pmap(), but executes each element of Data on a predestined ## worker, so that file caching at the local machine is beneficial. From a6c373ed2181f908f461782ab3da6a4498b57fe5 Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 15:14:15 -0400 Subject: [PATCH 15/25] Add data broadcast operations --- src/bayes.jl | 13 +++++++------ src/rand.jl | 4 ++-- src/train.jl | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/bayes.jl b/src/bayes.jl index 486b2ba..4f05ce8 100644 --- a/src/bayes.jl +++ b/src/bayes.jl @@ -64,9 +64,9 @@ end ## m-step given prior and stats function mstep(π::GMMprior, N::Vector{T}, mx::Matrix{T}, S::Vector) where {T} ng = length(N) - α = π.α₀ + N # ng, 10.58 - ν = π.ν₀ + N + 1 # ng, 10.63 - β = π.β₀ + N # ng, 10.60 + α = π.α₀ .+ N # ng, 10.58 + ν = π.ν₀ .+ N .+ 1 # ng, 10.63 + β = π.β₀ .+ N # ng, 10.60 m = similar(mx) # ng × d W = Array{eltype(FullCov{T})}(undef, ng) # ng × (d*d) d = size(mx,2) @@ -94,7 +94,7 @@ function expectations(vg::VGMM) Elogπ = digamma.(vg.α) .- digamma(sum(vg.α)) # 10.66, size ng ElogdetΛ = similar(Elogπ) # size ng for k in 1:ng - ElogdetΛ[k] = sum(digamma.(0.5(vg.ν[k] .+ 1 - collect(1:d)))) + d*log(2) + mylogdet(vg.W[k]) # 10.65 + ElogdetΛ[k] = sum(digamma.(0.5(vg.ν[k] .+ 1 .- collect(1:d)))) + d*log(2) + mylogdet(vg.W[k]) # 10.65 end return Elogπ, ElogdetΛ end @@ -207,7 +207,8 @@ function lowerbound(vg::VGMM, N::Vector, mx::Matrix, S::Vector, α, β, m, ν, W = vg.α, vg.β, vg.m, vg.ν, vg.W # VGMM vars gaussians = 1:ng ## B.79 - logB(W, ν) = -0.5ν * (mylogdet(W) + d*log(2)) - d*(d-1)/4*log(π) - sum(lgamma.(0.5(ν + 1 - collect(1:d)))) + logB(W, ν) = -0.5ν * (mylogdet(W) + d*log(2)) .- d*(d-1)/4*log(π) - + sum(lgamma.(0.5(ν + 1 .- collect(1:d)))) ## E[log p(x|Ζ,μ,Λ)] 10.71 Elogll = 0. for k in gaussians @@ -217,7 +218,7 @@ function lowerbound(vg::VGMM, N::Vector, mx::Matrix, S::Vector, - ν[k] * (trAB(S[k], Wk) + Wk * Δ ⋅ Δ)) # check chol efficiency end # 10.71 ## E[log p(Z|π)] from rnk() 10.72 - Elogpπ = lgamma(ng*α₀) - ng*lgamma(α₀) - (α₀-1)sum(Elogπ) # E[log p(π)] 10.73 + Elogpπ = lgamma(ng*α₀) .- ng*lgamma(α₀) .- (α₀-1)sum(Elogπ) # E[log p(π)] 10.73 ElogpμΛ = ng*logB(W₀, ν₀) # E[log p(μ, Λ)] B.79 for k in gaussians Δ = vec(m[k,:]) - m₀ # d ## v0.5 arraymageddon diff --git a/src/rand.jl b/src/rand.jl index 69a0d16..7dfb936 100644 --- a/src/rand.jl +++ b/src/rand.jl @@ -56,9 +56,9 @@ function Base.rand(gmm::GMM, n::Int) ind = findall(index.==i) nₓ = length(ind) if gmmkind == :diag - x[ind,:] = (vec(gmm.μ[i,:]) .+ vec(sqrt.(gmm.Σ[i,:])) .* randn(gmm.d, nₓ))' ## v0.5 arraymageddon + x[ind,:] .= (vec(gmm.μ[i,:]) .+ vec(sqrt.(gmm.Σ[i,:])) .* randn(gmm.d, nₓ))' ## v0.5 arraymageddon elseif gmmkind == :full - x[ind,:] = rand(MvNormal(vec(gmm.μ[i,:]), forcesymmetric(covar(gmm.Σ[i]))), nₓ)' + x[ind,:] .= rand(MvNormal(vec(gmm.μ[i,:]), forcesymmetric(covar(gmm.Σ[i]))), nₓ)' else error("Unknown kind") end diff --git a/src/train.jl b/src/train.jl index 9a0ab11..b02ba69 100644 --- a/src/train.jl +++ b/src/train.jl @@ -209,7 +209,7 @@ function gmmsplit(gmm::GMM{T}; minweight=1e-5, sep=0.2) where T end for oi=1:n ni = 2oi-1 : 2oi - w[ni] = gmm.w[oi]/2 + w[ni] .= gmm.w[oi]/2 if gmmkind == :diag μ[ni,:] = hcat(gmmsplit(vec(gmm.μ[oi,:]), vec(gmm.Σ[oi,:]), tsep)...)' for k=ni From bef22024890cfc10801ddc32cb061882735999b2 Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 15:14:29 -0400 Subject: [PATCH 16/25] shift!->popfirst! --- src/stats.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stats.jl b/src/stats.jl index 68988dd..36e9c11 100644 --- a/src/stats.jl +++ b/src/stats.jl @@ -138,7 +138,7 @@ function stats(gmm::GMM, x::Matrix{T}; order::Int=2, parallel=false) where T <: r = pmap(x->stats(gmm, x, order), xx) reduce(+, r) # get +() from BigData.jl else - r = stats(gmm, shift!(xx), order) + r = stats(gmm, popfirst!(xx), order) for x in xx r += stats(gmm, x, order) end From 06bb1ff65ceba988e46e960717e68c83181ba784 Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 15:15:01 -0400 Subject: [PATCH 17/25] indmax() -> argmax() --- src/train.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/train.jl b/src/train.jl index b02ba69..9a442a8 100644 --- a/src/train.jl +++ b/src/train.jl @@ -177,7 +177,7 @@ end function gmmsplit(μ::Vector{T}, Σ::Vector{T}, sep=0.2) where T tsep::T = sep - maxi = indmax(Σ) + maxi = argmax(Σ) p1 = zeros(length(μ)) p1[maxi] = tsep * Σ[maxi] μ - p1, μ + p1 From de5ef9b2f3a4600c28164bab2608d0f75bf51c4f Mon Sep 17 00:00:00 2001 From: fundamental Date: Tue, 30 Oct 2018 15:15:23 -0400 Subject: [PATCH 18/25] fieldnames(x) -> fieldnames(typeof(x)) --- src/scikitlearn.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scikitlearn.jl b/src/scikitlearn.jl index da0012a..e979b60 100644 --- a/src/scikitlearn.jl +++ b/src/scikitlearn.jl @@ -30,7 +30,7 @@ end function Base.copy!(gmm_dest::GMM, gmm_src::GMM) # shallow copy - used below - for f in fieldnames(gmm_dest) + for f in fieldnames(typeof(gmm_dest)) setfield!(gmm_dest, f, getfield(gmm_src, f)) end end From 24409cffafd0e298df4b2da7db04c225bff22330 Mon Sep 17 00:00:00 2001 From: fundamental Date: Wed, 31 Oct 2018 09:56:18 -0400 Subject: [PATCH 19/25] Update required/tested version to 0.7 --- .travis.yml | 3 ++- REQUIRE | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3ec7cd8..b973288 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,8 @@ sudo: false language: julia julia: - - 0.6 + - 0.7 + - 1.0 before_install: - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi addons: diff --git a/REQUIRE b/REQUIRE index 74fab79..34932ef 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,4 +1,4 @@ -julia 0.6 +julia 0.7 Clustering 0.6.0 Distributions StatsBase From df53ec136049f5d0bf197bc5d644aa345966e69d Mon Sep 17 00:00:00 2001 From: fundamental Date: Wed, 31 Oct 2018 10:12:49 -0400 Subject: [PATCH 20/25] Make Variance Warning Cleaner --- src/train.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/train.jl b/src/train.jl index 9a442a8..5931545 100644 --- a/src/train.jl +++ b/src/train.jl @@ -249,10 +249,10 @@ function em!(gmm::GMM, x::DataOrMatrix; nIter::Int = 10, varfloor::Float64=1e-3, if gmmkind == :diag gmm.Σ = S ./ N - gmm.μ.^2 ## var flooring - tooSmall = any(gmm.Σ .< varfloor, dims=2) + tooSmall = any(gmm.Σ .< varfloor, dims=2)[:] if (any(tooSmall)) ind = findall(tooSmall) - @warn("Variances had to be floored ", join(ind, " ")) + @warn("Variances had to be floored ", ind) gmm.Σ[ind,:] = initc[ind,:] end elseif gmmkind == :full From de0de2200425a439a7dd0ba8ada444401644e8aa Mon Sep 17 00:00:00 2001 From: fundamental Date: Wed, 31 Oct 2018 10:19:51 -0400 Subject: [PATCH 21/25] Update .travis.yml to 0.7 Style --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b973288..4f2df2a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,7 @@ language: julia julia: - 0.7 - 1.0 + - nightly before_install: - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi addons: @@ -10,4 +11,4 @@ addons: packages: - hdf5-tools script: - - julia --check-bounds=yes -e 'versioninfo(); Pkg.init(); Pkg.clone(pwd()); Pkg.build("GaussianMixtures"); Pkg.test("GaussianMixtures", coverage=VERSION < v"0.4.0-dev")' + - julia --check-bounds=yes -e 'versioninfo(); using Pkg; Pkg.clone(pwd()); Pkg.build("GaussianMixtures"); Pkg.test("GaussianMixtures", coverage=true)' From 3a90e0a670953fbdf491e0a9fec869ed4f9c384c Mon Sep 17 00:00:00 2001 From: fundamental Date: Wed, 31 Oct 2018 10:32:53 -0400 Subject: [PATCH 22/25] .travis.yml: remove versioninfo() call --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4f2df2a..764607c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,4 +11,4 @@ addons: packages: - hdf5-tools script: - - julia --check-bounds=yes -e 'versioninfo(); using Pkg; Pkg.clone(pwd()); Pkg.build("GaussianMixtures"); Pkg.test("GaussianMixtures", coverage=true)' + - julia --check-bounds=yes -e 'using Pkg; Pkg.clone(pwd()); Pkg.build("GaussianMixtures"); Pkg.test("GaussianMixtures", coverage=true)' From 3676a4c993ed3f98238ad90da57cd1d721262b85 Mon Sep 17 00:00:00 2001 From: fundamental Date: Wed, 31 Oct 2018 11:31:16 -0400 Subject: [PATCH 23/25] test/scikitlearn.jl: Mask testcase failure This should resolve testing on 0.7 with this testcase being reserved for later work --- test/scikitlearn.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/scikitlearn.jl b/test/scikitlearn.jl index da189f2..eb68a38 100644 --- a/test/scikitlearn.jl +++ b/test/scikitlearn.jl @@ -26,4 +26,8 @@ X_train = vcat(shifted_gaussian, stretched_gaussian) gmm = fit!(GMM(n_components=2, kind=:full), X_train) # Check that the training points are correctly classified -@assert sum(predict(gmm, X_train) .== 1) in [n_samples_A, n_samples_B] +@warn("Scikit functionality is currently not working correctly") +@warn("Expected output of the next step is 1, but currently is a CartesianIndex(2,1)") +println(predict(gmm, X_train)) in [n_samples_A, n_samples_B] +#@assert sum(predict(gmm, X_train)[:] .== 1) in [n_samples_A, n_samples_B] +@warn("End bad test...") From 7fb9ad92ee1968967ae2385ffb79af54f404d74d Mon Sep 17 00:00:00 2001 From: fundamental Date: Wed, 31 Oct 2018 12:14:42 -0400 Subject: [PATCH 24/25] Fix scikitlearn testcase Corrects indexing of scikitlearn and fixes associated test case --- src/scikitlearn.jl | 2 +- test/scikitlearn.jl | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/scikitlearn.jl b/src/scikitlearn.jl index e979b60..e1d21a2 100644 --- a/src/scikitlearn.jl +++ b/src/scikitlearn.jl @@ -49,7 +49,7 @@ ScikitLearnBase.predict_log_proba(gmm::GMM, X) = log(gmmposterior(gmm, X)[1]) ScikitLearnBase.predict_proba(gmm::GMM, X) = gmmposterior(gmm, X)[1] ScikitLearnBase.predict(gmm::GMM, X) = # This is just `argmax(axis=2)`. It's very verbose in Julia. - CartesianIndices(size(X))[vec(findmax(ScikitLearnBase.predict_proba(gmm, X), dims=2)[2])][2] + getindex.(argmax(ScikitLearnBase.predict_proba(gmm, X), dims=2), 2) """ `density(gmm::GMM, X)` returns `log(P(X|μ, Σ))` """ function density(gmm::GMM, X) diff --git a/test/scikitlearn.jl b/test/scikitlearn.jl index eb68a38..da189f2 100644 --- a/test/scikitlearn.jl +++ b/test/scikitlearn.jl @@ -26,8 +26,4 @@ X_train = vcat(shifted_gaussian, stretched_gaussian) gmm = fit!(GMM(n_components=2, kind=:full), X_train) # Check that the training points are correctly classified -@warn("Scikit functionality is currently not working correctly") -@warn("Expected output of the next step is 1, but currently is a CartesianIndex(2,1)") -println(predict(gmm, X_train)) in [n_samples_A, n_samples_B] -#@assert sum(predict(gmm, X_train)[:] .== 1) in [n_samples_A, n_samples_B] -@warn("End bad test...") +@assert sum(predict(gmm, X_train) .== 1) in [n_samples_A, n_samples_B] From a95d901728915d7347ba172182c78b1ea31a79c6 Mon Sep 17 00:00:00 2001 From: fundamental Date: Wed, 31 Oct 2018 13:11:41 -0400 Subject: [PATCH 25/25] src/scikitlearn.jl: Remove out-of-date comment --- src/scikitlearn.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/scikitlearn.jl b/src/scikitlearn.jl index e1d21a2..370f0e4 100644 --- a/src/scikitlearn.jl +++ b/src/scikitlearn.jl @@ -48,7 +48,6 @@ end ScikitLearnBase.predict_log_proba(gmm::GMM, X) = log(gmmposterior(gmm, X)[1]) ScikitLearnBase.predict_proba(gmm::GMM, X) = gmmposterior(gmm, X)[1] ScikitLearnBase.predict(gmm::GMM, X) = - # This is just `argmax(axis=2)`. It's very verbose in Julia. getindex.(argmax(ScikitLearnBase.predict_proba(gmm, X), dims=2), 2) """ `density(gmm::GMM, X)` returns `log(P(X|μ, Σ))` """