MATH06, Unconstrained multivariate optimization
Back to the previous page|Optimization
List of posts to read before reading this article
Contents
- Gradient and Hessian
- Optimization process
- Suitable starting point for optimization
- Optimization code in general as the following code makes it easier to switch between different solvers
Gradient and Hessian
import sympy
sympy.init_printing()
x1, x2 = sympy.symbols("x_1, x_2")
f_sym = (x1+10)**2 + 5 * (x2-9)**2 - 2*x1*x2
# Gradient
fprime_sym = [f_sym.diff(x_) for x_ in (x1, x2)]
Gradient = sympy.Matrix(fprime_sym)
# Hessian
fhess_sym = [[f_sym.diff(x1_, x2_) for x1_ in (x1, x2)] for x2_ in (x1, x2)]
Hessian = sympy.Matrix(fhess_sym)
print(Gradient, '\n', Hessian)
OUTPUT
:
\(\left[\begin{matrix}2 x_{1} - 2 x_{2} + 20\\- 2 x_{1} + 10 x_{2} - 90\end{matrix}\right], \left[\begin{matrix}2 & -2\\-2 & 10\end{matrix}\right]\)
Optimization process
The Newton-CG method with Gradient&Hessian
from scipy import optimize
import sympy
sympy.init_printing()
x1, x2 = sympy.symbols("x_1, x_2")
# Object function
f_sym = (x1-1)**4 + 5 * (x2-1)**2 - 2*x1*x2
# Gradient
fprime_sym = [f_sym.diff(x_) for x_ in (x1, x2)]
# Hessian
fhess_sym = [[f_sym.diff(x1_, x2_) for x1_ in (x1, x2)] for x2_ in (x1, x2)]
# Convert sympy function to numpy function
f_lmbda = sympy.lambdify((x1, x2), f_sym, 'numpy')
fprime_lmbda = sympy.lambdify((x1, x2), fprime_sym, 'numpy')
fhess_lmbda = sympy.lambdify((x1, x2), fhess_sym, 'numpy')
# Unpacking for multivariate function
def func_XY_to_X_Y(f):
'''Wrapper for f(X) -> f(X[0], X[1])'''
return lambda X: np.array(f(X[0], X[1]))
f = func_XY_to_X_Y(f_lmbda)
fprime = func_XY_to_X_Y(fprime_lmbda)
fhess = func_XY_to_X_Y(fhess_lmbda)
# Optimization
optimize.fmin_ncg(f, (0, 0), fprime=fprime, fhess=fhess)
OUTPUT
Optimization terminated successfully.
Current function value: -3.867223
Iterations: 8
Function evaluations: 10
Gradient evaluations: 17
Hessian evaluations: 8
array([1.88292613, 1.37658523])
\(Optimal\ point\ :\ (x_{1},x_{2})=(1.88292613,1.37658523)\)
SUPPLEMENT
Gradient
sympy.Matrix(fprime_sym)
OUTPUT
:
\(\left[\begin{matrix}- 2 x_{2} + 4 \left(x_{1} - 1\right)^{3}\\- 2 x_{1} + 10 x_{2} - 10\end{matrix}\right]\)
Hessian
sympy.Matrix(fhess_sym)
OUTPUT
:
\(\left[\begin{matrix}12 \left(x_{1} - 1\right)^{2} & -2\\-2 & 10\end{matrix}\right]\)
VISUALLIZATION
import matplotlib.pyplot as plt
x_opt = optimize.fmin_ncg(f, (0, 0), fprime=fprime, fhess=fhess)
x_ = y_ = np.linspace(-1, 4, 100)
X, Y = np.meshgrid(x_, y_)
fig, ax = plt.subplots(figsize=(6, 4))
c = ax.contour(X, Y, f_lmbda(X, Y), 100)
plt.colorbar(c, ax=ax)
ax.plot(x_opt[0], x_opt[1], 'r*', markersize=15)
ax.set_xlabel(r"$x_1$", fontsize=18)
ax.set_ylabel(r"$x_2$", fontsize=18)
plt.show()
The BFGS algorithm with Gradient
from scipy import optimize
import sympy
sympy.init_printing()
import numpy as np
x1, x2 = sympy.symbols("x_1, x_2")
# Object function
f_sym = (x1-1)**4 + 5 * (x2-1)**2 - 2*x1*x2
# Gradient
fprime_sym = [f_sym.diff(x_) for x_ in (x1, x2)]
# Convert sympy function to numpy function
f_lmbda = sympy.lambdify((x1, x2), f_sym, 'numpy')
fprime_lmbda = sympy.lambdify((x1, x2), fprime_sym, 'numpy')
# Unpacking for multivariate function
def func_XY_to_X_Y(f):
'''Wrapper for f(X) -> f(X[0], X[1])'''
return lambda X: np.array(f(X[0], X[1]))
f = func_XY_to_X_Y(f_lmbda)
fprime = func_XY_to_X_Y(fprime_lmbda)
# Optimization
optimize.fmin_bfgs(f, (0, 0), fprime=fprime)
OUTPUT
Optimization terminated successfully.
Current function value: -3.867223
Iterations: 9
Function evaluations: 13
Gradient evaluations: 13
array([1.88292645, 1.37658596])
\(Optimal\ point\ :\ (x_{1},x_{2})=(1.88292645,1.37658596)\)
SUPPLEMENT
Gradient
sympy.Matrix(fprime_sym)
OUTPUT
:
\(\left[\begin{matrix}- 2 x_{2} + 4 \left(x_{1} - 1\right)^{3}\\- 2 x_{1} + 10 x_{2} - 10\end{matrix}\right]\)
VISUALLIZATION
import matplotlib.pyplot as plt
x_opt = optimize.fmin_bfgs(f, (0, 0), fprime=fprime)
x_ = y_ = np.linspace(-1, 4, 100)
X, Y = np.meshgrid(x_, y_)
fig, ax = plt.subplots(figsize=(6, 4))
c = ax.contour(X, Y, f_lmbda(X, Y), 100)
plt.colorbar(c, ax=ax)
ax.plot(x_opt[0], x_opt[1], 'r*', markersize=15)
ax.set_xlabel(r"$x_1$", fontsize=18)
ax.set_ylabel(r"$x_2$", fontsize=18)
plt.show()
A nonlinear conjugate gradient algorithm with Gradient
from scipy import optimize
import sympy
sympy.init_printing()
import numpy as np
x1, x2 = sympy.symbols("x_1, x_2")
# Object function
f_sym = (x1-1)**4 + 5 * (x2-1)**2 - 2*x1*x2
# Gradient
fprime_sym = [f_sym.diff(x_) for x_ in (x1, x2)]
# Convert sympy function to numpy function
f_lmbda = sympy.lambdify((x1, x2), f_sym, 'numpy')
fprime_lmbda = sympy.lambdify((x1, x2), fprime_sym, 'numpy')
# Unpacking for multivariate function
def func_XY_to_X_Y(f):
'''Wrapper for f(X) -> f(X[0], X[1])'''
return lambda X: np.array(f(X[0], X[1]))
f = func_XY_to_X_Y(f_lmbda)
fprime = func_XY_to_X_Y(fprime_lmbda)
# Optimization
optimize.fmin_cg(f, (0, 0), fprime=fprime)
OUTPUT
Optimization terminated successfully.
Current function value: -3.867223
Iterations: 8
Function evaluations: 18
Gradient evaluations: 18
array([1.88292612, 1.37658523])
\(Optimal\ point\ :\ (x_{1},x_{2})=(1.88292612,1.37658523)\)
SUPPLEMENT
Gradient
sympy.Matrix(fprime_sym)
OUTPUT
:
\(\left[\begin{matrix}- 2 x_{2} + 4 \left(x_{1} - 1\right)^{3}\\- 2 x_{1} + 10 x_{2} - 10\end{matrix}\right]\)
VISUALLIZATION
import matplotlib.pyplot as plt
x_opt = optimize.fmin_cg(f, (0, 0), fprime=fprime)
x_ = y_ = np.linspace(-1, 4, 100)
X, Y = np.meshgrid(x_, y_)
fig, ax = plt.subplots(figsize=(6, 4))
c = ax.contour(X, Y, f_lmbda(X, Y), 100)
plt.colorbar(c, ax=ax)
ax.plot(x_opt[0], x_opt[1], 'r*', markersize=15)
ax.set_xlabel(r"$x_1$", fontsize=18)
ax.set_ylabel(r"$x_2$", fontsize=18)
plt.show()
the BFGS algorithm without Gradient&Hessian
from scipy import optimize
import sympy
sympy.init_printing()
import numpy as np
x1, x2 = sympy.symbols("x_1, x_2")
# Object function
f_sym = (x1-1)**4 + 5 * (x2-1)**2 - 2*x1*x2
# Convert sympy function to numpy function
f_lmbda = sympy.lambdify((x1, x2), f_sym, 'numpy')
# Unpacking for multivariate function
def func_XY_to_X_Y(f):
'''Wrapper for f(X) -> f(X[0], X[1])'''
return lambda X: np.array(f(X[0], X[1]))
f = func_XY_to_X_Y(f_lmbda)
# Optimization
optimize.fmin_bfgs(f, (0, 0))
OUTPUT
Optimization terminated successfully.
Current function value: -3.867223
Iterations: 9
Function evaluations: 52
Gradient evaluations: 13
array([1.88292645, 1.37658596])
\(Optimal\ point\ :\ (x_{1},x_{2})=(1.88292645,1.37658596)\)
VISUALLIZATION
import matplotlib.pyplot as plt
x_opt = optimize.fmin_bfgs(f, (0, 0))
x_ = y_ = np.linspace(-1, 4, 100)
X, Y = np.meshgrid(x_, y_)
fig, ax = plt.subplots(figsize=(6, 4))
c = ax.contour(X, Y, f_lmbda(X, Y), 100)
plt.colorbar(c, ax=ax)
ax.plot(x_opt[0], x_opt[1], 'r*', markersize=15)
ax.set_xlabel(r"$x_1$", fontsize=18)
ax.set_ylabel(r"$x_2$", fontsize=18)
plt.show()
Suitable starting point for optimization
import numpy as np
from scipy import optimize
# the objective function
def f(X):
x, y = X
return (4 * np.sin(np.pi * x) + 6 * np.sin(np.pi * y)) + (x - 1)**2 + (y - 1)**2
# find starting point
optimize.brute(f, (slice(-3, 5, 0.5), slice(-3, 5, 0.5)), finish=None)
OUTPUT
: \(Suitable\ starting\ point\ :\ (x,y) = (1.5,1.5)\)
Optimization process from suitable starting point
import numpy as np
from scipy import optimize
# the objective function
def f(X):
x, y = X
return (4 * np.sin(np.pi * x) + 6 * np.sin(np.pi * y)) + (x - 1)**2 + (y - 1)**2
# find starting point
x_start = optimize.brute(f, (slice(-3, 5, 0.5), slice(-3, 5, 0.5)), finish=None)
# optimization
optimize.fmin_bfgs(f, x_start)
OUTPUT
Optimization terminated successfully.
Current function value: -9.520229
Iterations: 4
Function evaluations: 28
Gradient evaluations: 7
array([1.47586906, 1.48365787])
\(Optimal\ point\ :\ (x,y) = (1.47586906,1.48365787)\)
VISUALLIZATION
import matplotlib.pyplot as plt
x_opt = optimize.fmin_bfgs(f, x_start)
def func_X_Y_to_XY(f, X, Y):
"""
Wrapper for f(X, Y) -> f([X, Y])
"""
s = np.shape(X)
return f(np.vstack([X.ravel(), Y.ravel()])).reshape(*s)
fig, ax = plt.subplots(figsize=(6, 4))
x_ = y_ = np.linspace(-3, 5, 100)
X, Y = np.meshgrid(x_, y_)
c = ax.contour(X, Y, func_X_Y_to_XY(f, X, Y), 25)
plt.colorbar(c, ax=ax)
ax.plot(x_opt[0], x_opt[1], 'r*', markersize=15)
ax.set_xlabel(r"$x_1$", fontsize=18)
ax.set_ylabel(r"$x_2$", fontsize=18)
Optimization code in general as the following code makes it easier to switch between different solvers
import numpy as np
from scipy import optimize
# the objective function
def f(X):
x, y = X
return (4 * np.sin(np.pi * x) + 6 * np.sin(np.pi * y)) + (x - 1)**2 + (y - 1)**2
# find starting point
x_start = optimize.brute(f, (slice(-3, 5, 0.5), slice(-3, 5, 0.5)), finish=None)
# optimization
result = optimize.minimize(f, x_start, method= 'BFGS') # can be easily change 'method'
result.x
OUTPUT
: \(Optimal\ point\ :\ (x,y) = (1.47586906,1.48365787)\)
List of posts followed by this article
Reference