# Gaussian Process Regression (for large data)

using AugmentedGaussianProcesses
const AGP = AugmentedGaussianProcesses
using Distributions
using Plots

We create a toy dataset with X ∈ [-20, 20] and y = 5 * sinc(X)

N = 1000
X = reshape((sort(rand(N)) .- 0.5) * 40.0, N, 1)
σ = 0.01

function latent(x)
return 5.0 * sinc.(x)
end
Y = vec(latent(X) + σ * randn(N));

Visualization of the data :

scatter(X, Y; lab="")

## Gaussian noise

In this first example we are going to look at the effect of using inducing points compared to the true Gaussian Process For simplicity we will keep all inducing points and kernel parameters fixed

Ms = [4, 8, 16, 32, 64];

Create an empty array of GPs

models = Vector{AbstractGP}(undef, length(Ms) + 1);

Chose a kernel

kernel = SqExponentialKernel();#  + PeriodicKernel()

And Run sparse classification with an increasing number of inducing points

for (index, num_inducing) in enumerate(Ms)
@info "Training with $(num_inducing) points" m = SVGP( X, Y, # First arguments are the input and output kernel, # Kernel GaussianLikelihood(σ), # Likelihood used AnalyticVI(), # Inference usede to solve the problem num_inducing; # Number of inducing points used optimiser=false, # Keep kernel parameters fixed Zoptimiser=false, # Keep inducing points locations fixed ) @time train!(m, 100) # Train the model for 100 iterations models[index] = m # Save the model in the array end [ Info: Training with 4 points 0.016326 seconds (9.37 k allocations: 15.620 MiB) [ Info: Training with 8 points 0.049235 seconds (9.37 k allocations: 25.657 MiB, 62.21% gc time) [ Info: Training with 16 points 0.048620 seconds (9.37 k allocations: 46.356 MiB, 48.49% gc time) [ Info: Training with 32 points 0.078698 seconds (9.37 k allocations: 90.384 MiB, 47.54% gc time) [ Info: Training with 64 points 0.157723 seconds (10.28 k allocations: 189.062 MiB, 36.65% gc time) Train the model without any inducing points (no approximation) @info "Training with full model" mfull = GP( X, Y, kernel; noise=σ, opt_noise=false, # Keep the noise value fixed optimiser=false, # Keep kernel parameters fixed ) @time train!(mfull, 5); models[end] = mfull; [ Info: Training with full model 1.461951 seconds (236 allocations: 366.450 MiB, 9.44% gc time) Create a grid and compute prediction on it function compute_grid(model, n_grid=50) mins = -20 maxs = 20 x_grid = range(mins, maxs; length=n_grid) # Create a grid y_grid, sig_y_grid = proba_y(model, reshape(x_grid, :, 1)) # Predict the mean and variance on the grid return y_grid, sig_y_grid, x_grid end; Plot the data as a scatter plot function plotdata(X, Y) return Plots.scatter(X, Y; alpha=0.33, msw=0.0, lab="", size=(300, 500)) end function plot_model(model, X, Y, title=nothing) n_grid = 100 y_grid, sig_y_grid, x_grid = compute_grid(model, n_grid) title = if isnothing(title) (model isa SVGP ? "M =$(dim(model[1]))" : "full")
else
title
end

p = plotdata(X, Y)
Plots.plot!(
p,
x_grid,
y_grid;
ribbon=2 * sqrt.(sig_y_grid), # Plot 2 std deviations
title=title,
color="red",
lab="",
linewidth=3.0,
)
if model isa SVGP # Plot the inducing points as well
Plots.plot!(
p,
vec(model.f[1].Z),
zeros(dim(model.f[1]));
msize=2.0,
color="black",
t=:scatter,
lab="",
)
end
return p
end;

Plots.plot(
plot_model.(models, Ref(X), Ref(Y))...; layout=(1, length(models)), size=(1000, 200)
) # Plot all models and combine the plots