Florian Oswald Sciences Po, 2019
using Plots
using Optim
gr()
f(x) = exp(x) - x^4
minf(x) = -f(x)
brent = optimize(minf,0,2,Brent())
golden = optimize(minf,0,2,GoldenSection())
println("brent = $brent")
println("golden = $golden")
plot(f,0,2)
Roots.jl
IntervalRootFinding.jl
using Roots
#Â find the zeros of this function:
f(x) = exp(x) - x^4
## bracketing
fzero(f, 8, 9) # 8.613169456441398
fzero(f, -10, 0) # -0.8155534188089606
using IntervalRootFinding, IntervalArithmetic
-10..10
X = IntervalBox(1..3, 2..4)
a = @interval(0.1, 0.3)
b = @interval(0.3, 0.6)
a + b
rts = roots(x->x^2 - 2, -10..10, IntervalRootFinding.Bisection)
using Optim
using OptimTestProblems
for (name, prob) in MultivariateProblems.UnconstrainedProblems.examples
println(name)
end
rosenbrock = MultivariateProblems.UnconstrainedProblems.examples["Rosenbrock"]
# grid search on rosenbrock
grid = collect(-1.0:0.1:3);
grid2D = [[i;j] for i in grid,j in grid];
val2D = map(rosenbrock.f,grid2D);
r = findmin(val2D);
println("grid search results in minimizer = $(grid2D[r[2]])")
All descent methods follow more or less this structure. At iteration $k$,
# https://github.com/JuliaNLSolvers/LineSearches.jl
using LineSearches
algo_hz = Optim.Newton(linesearch = HagerZhang()) # Both Optim.jl and IntervalRootFinding.jl export `Newton`
res_hz = Optim.optimize(rosenbrock.f, rosenbrock.g!, rosenbrock.h!, rosenbrock.initial_x, method=algo_hz)
# Optim.jl has a TrustRegion for Newton (see below for Newton's Method)
NewtonTrustRegion(; initial_delta = 1.0, # The starting trust region radius
delta_hat = 100.0, # The largest allowable trust region radius
eta = 0.1, #When rho is at least eta, accept the step.
rho_lower = 0.25, # When rho is less than rho_lower, shrink the trust region.
rho_upper = 0.75) # When rho is greater than rho_upper, grow the trust region (though no greater than delta_hat).
res = Optim.optimize(rosenbrock.f, rosenbrock.g!, rosenbrock.h!, rosenbrock.initial_x, method=NewtonTrustRegion())
# Optim.jl again
GradientDescent(; alphaguess = LineSearches.InitialPrevious(),
linesearch = LineSearches.HagerZhang(),
P = nothing,
precondprep = (P, x) -> nothing)
# there is a dedicated LineSearch package: https://github.com/JuliaNLSolvers/LineSearches.jl
GD = optimize(rosenbrock.f, rosenbrock.g!,[0.0, 0.0],GradientDescent())
GD1 = optimize(rosenbrock.f, rosenbrock.g!,[0.0, 0.0],GradientDescent(),Optim.Options(iterations=5000))
GD2 = optimize(rosenbrock.f, rosenbrock.g!,[0.0, 0.0],GradientDescent(),Optim.Options(iterations=50000))
println("gradient descent = $GD")
println("\n")
println("gradient descent 2 = $GD1")
println("\n")
println("gradient descent 3 = $GD2")
optimize(rosenbrock.f, rosenbrock.g!, rosenbrock.h!, [0.0, 0.0], Optim.Newton(),Optim.Options(show_trace=true))
@show optimize(rosenbrock.f, rosenbrock.g!, rosenbrock.h!, [-1.0, 3.0], BFGS());
# low memory BFGS
@show optimize(rosenbrock.f, rosenbrock.g!, rosenbrock.h!, [0.0, 0.0], LBFGS());
# start to setup a basis function, i.e. unit vectors to index each direction:
basis(i, n) = [k == i ? 1.0 : 0.0 for k in 1 : n]
function cyclic_coordinate_descent(f, x, ε)
Δ, n = Inf, length(x)
while abs(Δ) > ε
x′ = copy(x)
for i in 1 : n
d = basis(i, n)
x = line_search(f, x, d)
end
Δ = norm(x - x′)
end
return x
end
function generalized_pattern_search(f, x, α, D, ε, γ=0.5)
y, n = f(x), length(x)
evals = 0
while α > ε
improved = false
for (i,d) in enumerate(D)
x′ = x + α*d
y′ = f(x′)
evals += 1
if y′ < y
x, y, improved = x′, y′, true
D = pushfirst!(deleteat!(D, i), d)
break
end
end
if !improved
α *= γ
end
end
println("$evals evaluations")
return x
end
D = [[1,0],[0,1],[-1,-0.5]]
D = [[1,0],[0,1]]
y=generalized_pattern_search(rosenbrock.f,zeros(2),0.8,D,1e-6 )
fmincon
and fminsearch
implements it.nm=optimize(rosenbrock.f, [0.0, 0.0], NelderMead());
nm.minimizer
Lagarias et al. (SIOPT, 1999): At present there is no function in any dimension greater than one, for which the original Nelder-Mead algorithm has been proved to converge to a minimizer.
Given all the known inefficiencies and failures of the Nelder-Mead algorithm [...], one might wonder why it is used at all, let alone why it is so extraordinarily popular.
$$\mathbf{x}^{(k+1)} \longleftarrow \mathbf{x}^{(k)} +\alpha^k\mathbf{g}^{(k)} + \mathbf{\varepsilon}^{(k)}$$
#Â f: function
# x: initial point
# T: transition distribution
#Â t: temp schedule, k_max: max iterations
function simulated_annealing(f, x, T, t, k_max)
y = f(x)
ytrace = zeros(typeof(y),k_max)
x_best, y_best = x, y
for k in 1 : k_max
x′ = x + rand(T)
y′ = f(x′)
Δy = y′ - y
if Δy ≤ 0 || rand() < exp(-Δy/t(k))
x, y = x′, y′
end
if y′ < y_best
x_best, y_best = x′, y′
end
ytrace[k] = y_best
end
return x_best,ytrace
end
function ackley(x, a=20, b=0.2, c=2Ï€)
d = length(x)
return -a*exp(-b*sqrt(sum(x.^2)/d)) - exp(sum(cos.(c*xi) for xi in x)/d) + a + exp(1)
end
using Plots
gr()
surface(-30:0.1:30,-30:0.1:30,(x,y)->ackley([x, y]),cbar=false)
p = Any[]
using Distributions
gr()
niters = 1000
temps = (1,10,25)
push!(p,[plot(x->i/x,1:1000,title = "tmp $i",lw=2,ylims = (0,1),leg = false) for i in (1,10,25)]...)
for sig in (1,5,25), t1 in (1,10,25)
y = simulated_annealing(ackley,[15,15],MvNormal(2,sig),x->t1/x,1000)[2][:]
push!(p,plot(y,title = "sig = $sig",leg=false,lw=1.5,color="red",ylims = (0,20)))
end
plot(p...,layout = (4,3))
Recall our core optimization problem:
$$ \min_{x\in\mathbb{R}^n} f(x) \text{ s.t. } x \in \mathcal{X} $$where both $f$ and $h$ have continuous partial derivatives.
In other words, we want to find the best $x$ s.t. $h(x) = 0$ and we have
$$ \nabla f(x) = \lambda \nabla h(x) $$for some Lagrange Mutliplier $\lambda$
Suppose we have
$$ \begin{aligned} \min_x & -\exp\left( -\left( x_1 x_2 - \frac{3}{2} \right)^2 - \left(x_2 - \frac{3}{2}\right)^2 \right) \\ \text{subject to } & x_1 - x_2^2 = 0 \end{aligned} $$We form the Lagrangiagn:
$$ \mathcal{L}(x_1,x_2,\lambda) = -\exp\left( -\left( x_1 x_2 - \frac{3}{2} \right)^2 - \left(x_2 - \frac{3}{2}\right)^2 \right) - \lambda(x_1 - x_2^2) $$Then we compute the gradient wrt to $x_1,x_2,\lambda$, set to zero and solve.
gr()
f(x1,x2) = -exp.(-(x1.*x2 - 3/2).^2 - (x2-3/2).^2)
c(x1) = sqrt(x1)
x=0:0.01:3.5
contour(x,x,(x,y)->f(x,y),lw=1.5,levels=[collect(0:-0.1:-0.85)...,-0.887,-0.95,-1])
plot!(c,0.01,3.5,label="",lw=2,color=:black)
scatter!([1.358],[1.165],markersize=5,markercolor=:red,label="Constr. Optimum")
Suppose now we had
$$ \begin{aligned} \min_\mathbf{x} & f(\mathbf{x}) \\ \text{subject to } & g(\mathbf{x}) \leq 0 \end{aligned} $$which, if the solution lies right on the constraint boundary, means that
$$ \nabla f - \mu \nabla g = 0 $$for some scalar $\mu$ - as before.
#Â the blue area shows the FEASIBLE SET
contour(x,x,(x,y)->f(x,y),lw=1.5,levels=[collect(0:-0.1:-0.85)...,-0.887,-0.95,-1])
plot!(c,0.01,3.5,label="",lw=2,color=:black,fill=(0,0.5,:blue))
scatter!([1.358],[1.165],markersize=5,markercolor=:red,label="Constr. Optimum")
#Â the blue area shows the FEASIBLE SET
#Â NOW THE CONSTRAINT IS INACTIVE OR SLACK!
c2(x1) = 1+sqrt(x1)
contour(x,x,(x,y)->f(x,y),lw=1.5,levels=[collect(0:-0.1:-0.85)...,-0.887,-0.95,-1])
plot!(c2,0.01,3.5,label="",lw=2,color=:black,fill=(0,0.5,:blue))
scatter!([1],[1.5],markersize=5,markercolor=:red,label="Unconstr. Optimum")
The preceding four conditions are called the Kuhn-Karush-Tucker (KKT) conditions. In the above order, and in general terms, they are:
The KKT conditions are the FONCs for problems with smooth constraints.
We can combine equality and inequality constraints:
$$ \mathcal{L}(\mathbf{x},\mathbf{\lambda},\mathbf{\mu}) = f(\mathbf{x}) + \sum_{i} \lambda_i h_i(\mathbf{x}) + \sum_j \mu_j g_j(\mathbf{x}) $$where, notice, we reverted the sign of $\lambda$ since this is unrestricted.
As one approaches the constraint boundary, the barrier function goes to infinity. Properties:
$p_\text{barrier}(\mathbf{x})$ is continuous
NLopt.jl
¶NLopt
expects contraints always to be formulated in the format
$$ g(x) \leq 0 $$
where $g$ is your constraint functionusing NLopt
count = 0 # keep track of # function evaluations
function myfunc(x::Vector, grad::Vector)
if length(grad) > 0
grad[1] = 0
grad[2] = 0.5/sqrt(x[2])
end
global count
count::Int += 1
println("f_$count($x)")
sqrt(x[2])
end
function myconstraint(x::Vector, grad::Vector, a, b)
if length(grad) > 0
grad[1] = 3a * (a*x[1] + b)^2
grad[2] = -1
end
(a*x[1] + b)^3 - x[2]
end
opt = Opt(:LD_MMA, 2)
lower_bounds!(opt, [-Inf, 0.])
xtol_rel!(opt,1e-4)
min_objective!(opt, myfunc)
inequality_constraint!(opt, (x,g) -> myconstraint(x,g,2,0), 1e-8)
inequality_constraint!(opt, (x,g) -> myconstraint(x,g,-1,1), 1e-8)
(minfunc,minx,ret) = NLopt.optimize(opt, [1.234, 5.678])
println("got $minfunc at $minx after $count iterations (returned $ret)")
NLopt
format, the constraint is $x_1 + x_2 - 0.8 \leq 0$function rosenbrockf(x::Vector,grad::Vector)
if length(grad) > 0
grad[1] = -2.0 * (1.0 - x[1]) - 400.0 * (x[2] - x[1]^2) * x[1]
grad[2] = 200.0 * (x[2] - x[1]^2)
end
return (1.0 - x[1])^2 + 100.0 * (x[2] - x[1]^2)^2
end
function r_constraint(x::Vector, grad::Vector)
if length(grad) > 0
grad[1] = 2*x[1]
grad[2] = 2*x[2]
end
return x[1]^2 + x[2]^2 - 0.8
end
opt = Opt(:LD_MMA, 2)
lower_bounds!(opt, [-5, -5.0])
min_objective!(opt,(x,g) -> rosenbrockf(x,g))
inequality_constraint!(opt, (x,g) -> r_constraint(x,g))
ftol_rel!(opt,1e-9)
NLopt.optimize(opt, [-1.0,0.0])
JuMP.jl
JuMP
uses MathProgBase.jl
, which converts your problem formulation into a standard representation of an optimization problem.using JuMP
using GLPK
model = Model(with_optimizer(GLPK.Optimizer))
@variable(model, 0 <= x <= 2)
@variable(model, 0 <= y <= 30)
# next, we set an objective function
@objective(model, Max, 5x + 3 * y)
# maybe add a constraint called "con":
@constraint(model, con, 1x + 5y <= 3);
JuMP
has a mathematical representation of our model internalizedMathProgBase
machinery knows now exactly how to translate that to different solver interfacesJuMP.optimize!(model)
# look at status
termination_status(model)
# we query objective value and solutions
@show objective_value(model)
@show value(x)
@show value(y)
# as well as the value of the dual variable on the constraint
@show dual(con);
For linear programs, a feasible dual on a
>=
constraint is nonnegative and a feasible dual on a<=
constraint is nonpositive
# helpfully, we have this, which is always positive:
shadow_price(con)
# JuMP: nonlinear Rosenbrock Example
# Instead of hand-coding first and second derivatives, you only have to give `JuMP` expressions for objective and constraints.
# Here is an example.
using Ipopt
let
m = Model(with_optimizer(Ipopt.Optimizer))
@variable(m, x)
@variable(m, y)
@NLobjective(m, Min, (1-x)^2 + 100(y-x^2)^2)
JuMP.optimize!(m)
@show value(x)
@show value(y)
@show termination_status(m)
end
# not bad, right?
# adding the constraint from before:
let
m = Model(with_optimizer(Ipopt.Optimizer))
@variable(m, x)
@variable(m, y)
@NLobjective(m, Min, (1-x)^2 + 100(y-x^2)^2)
@NLconstraint(m,x^2 + y^2 <= 0.8)
JuMP.optimize!(m)
@show value(x)
@show value(y)
@show termination_status(m)
end
JuMP
.# Copyright 2015, Iain Dunning, Joey Huchette, Miles Lubin, and contributors
# example modified
using Distributions
let
distrib = Normal(4.5,3.5)
n = 10000
data = rand(distrib,n);
m = Model(with_optimizer(Ipopt.Optimizer))
@variable(m, mu, start = 0.0)
@variable(m, sigma >= 0.0, start = 1.0)
@NLobjective(m, Max, -(n/2)*log(2Ï€*sigma^2)-sum((data[i] - mu) ^ 2 for i = 1:n)/(2*sigma^2))
JuMP.optimize!(m)
@show termination_status(m)
println("μ = ", value(mu),", mean(data) = ", mean(data))
println("σ^2 = ", value(sigma)^2, ", var(data) = ", var(data))
end
# ... to JuMP
# https://github.com/JuliaOpt/JuMP.jl/blob/release-0.19/examples/cannery.jl
# Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#############################################################################
# JuMP
# An algebraic modeling language for Julia
# See http://github.com/JuliaOpt/JuMP.jl
#############################################################################
using JuMP, GLPK, Test
const MOI = JuMP.MathOptInterface
"""
example_cannery(; verbose = true)
JuMP implementation of the cannery problem from Dantzig, Linear Programming and
Extensions, Princeton University Press, Princeton, NJ, 1963.
Author: Louis Luangkesorn
Date: January 30, 2015
"""
function example_cannery(; verbose = true)
plants = ["Seattle", "San-Diego"]
markets = ["New-York", "Chicago", "Topeka"]
# Capacity and demand in cases.
capacity = [350, 600]
demand = [300, 300, 300]
# Distance in thousand miles.
distance = [2.5 1.7 1.8; 2.5 1.8 1.4]
# Cost per case per thousand miles.
freight = 90
num_plants = length(plants)
num_markets = length(markets)
cannery = Model(with_optimizer(GLPK.Optimizer))
@variable(cannery, ship[1:num_plants, 1:num_markets] >= 0)
# Ship no more than plant capacity
@constraint(cannery, capacity_con[i in 1:num_plants],
sum(ship[i,j] for j in 1:num_markets) <= capacity[i]
)
# Ship at least market demand
@constraint(cannery, demand_con[j in 1:num_markets],
sum(ship[i,j] for i in 1:num_plants) >= demand[j]
)
# Minimize transporatation cost
@objective(cannery, Min, sum(distance[i, j] * freight * ship[i, j]
for i in 1:num_plants, j in 1:num_markets)
)
JuMP.optimize!(cannery)
if verbose
println("RESULTS:")
for i in 1:num_plants
for j in 1:num_markets
println(" $(plants[i]) $(markets[j]) = $(JuMP.value(ship[i, j]))")
end
end
end
@test JuMP.termination_status(cannery) == MOI.OPTIMAL
@test JuMP.primal_status(cannery) == MOI.FEASIBLE_POINT
@test JuMP.objective_value(cannery) == 151200.0
end
example_cannery()
x = -3:0.01:3
dx = repeat(range(-3,stop = 3, length = 7),1,7)
contourf(x,x,(x,y)->x+y,color=:blues)
scatter!(dx,dx',legend=false,markercolor=:white)
plot!(x->sqrt(4-x^2),-2,2,c=:white)
plot!(x->-sqrt(4-x^2),-2,2,c=:white)
scatter!([-2,-1,0],[0,-1,-2],c=:red)
scatter!([-sqrt(2)],[-sqrt(2)],c=:red,markershape=:cross,markersize=9)
# Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#############################################################################
# JuMP
# An algebraic modeling langauge for Julia
# See http://github.com/JuliaOpt/JuMP.jl
#############################################################################
# knapsack.jl
#
# Solves a simple knapsack problem:
# max sum(p_j x_j)
# st sum(w_j x_j) <= C
# x binary
#############################################################################
using JuMP, Cbc, LinearAlgebra
let
# Maximization problem
m = Model(with_optimizer(Cbc.Optimizer))
@variable(m, x[1:5], Bin)
profit = [ 5, 3, 2, 7, 4 ]
weight = [ 2, 8, 4, 2, 5 ]
capacity = 10
# Objective: maximize profit
@objective(m, Max, dot(profit, x))
# Constraint: can carry all
@constraint(m, dot(weight, x) <= capacity)
# Solve problem using MIP solver
optimize!(m)
println("Objective is: ", JuMP.objective_value(m))
println("Solution is:")
for i = 1:5
print("x[$i] = ", JuMP.value(x[i]))
println(", p[$i]/w[$i] = ", profit[i]/weight[i])
end
end