Source code for hippylib.algorithms.bfgs

# Copyright (c) 2016-2018, The University of Texas at Austin 
# & University of California, Merced.
#
# All Rights reserved.
# See file COPYRIGHT for details.
#
# This file is part of the hIPPYlib library. For more information and source code
# availability see https://hippylib.github.io.
#
# hIPPYlib is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License (as published by the Free
# Software Foundation) version 2.0 dated June 1991.

from __future__ import absolute_import, division, print_function

import numpy as np

from ..modeling.variables import STATE, PARAMETER, ADJOINT
from ..utils.parameterList import ParameterList
from .NewtonCG import LS_ParameterList

[docs]def BFGSoperator_ParameterList(): parameters = {} parameters["BFGS_damping"] = [0.2, "Damping of BFGS"] parameters["memory_limit"] = [np.inf, "Number of vectors to store in limited memory BFGS"] return ParameterList(parameters)
[docs]def BFGS_ParameterList(): parameters = {} parameters["rel_tolerance"] = [1e-6, "we converge when sqrt(g,g)/sqrt(g_0,g_0) <= rel_tolerance"] parameters["abs_tolerance"] = [1e-12, "we converge when sqrt(g,g) <= abs_tolerance"] parameters["gdm_tolerance"] = [1e-18, "we converge when (g,dm) <= gdm_tolerance"] parameters["max_iter"] = [500, "maximum number of iterations"] parameters["inner_rel_tolerance"] = [1e-9, "relative tolerance used for the solution of the forward, adjoint, and incremental (fwd,adj) problems"] parameters["globalization"] = ["LS", "Globalization technique: line search (LS) or trust region (TR)"] parameters["print_level"] = [0, "Control verbosity of printing screen"] parameters["GN_iter"] = [5, "Number of Gauss Newton iterations before switching to Newton"] parameters["cg_coarse_tolerance"] = [.5, "Coarsest tolerance for the CG method (Eisenstat-Walker)"] ls_list = LS_ParameterList() ls_list["max_backtracking_iter"] = 25 parameters["LS"] = [ls_list, "Sublist containing LS globalization parameters"] parameters["BFGS_op"] = [BFGSoperator_ParameterList(), "BFGS operator"] return ParameterList(parameters)
[docs]class RescaledIdentity(object): """ Default operator for :code:`H0inv`, corresponds to applying :math:`d0 I` """ def __init__(self, init_vector=None): self.d0 = 1.0 self._init_vector = init_vector
[docs] def init_vector(self, x, dim): if self._init_vector: self._init_vector(x,dim) else: raise
[docs] def solve(self, x, b): x.zero() x.axpy(self.d0, b)
[docs]class BFGS_operator: def __init__(self, parameters=BFGSoperator_ParameterList()): self.S, self.Y, self.R = [],[],[] self.H0inv = None self.help = None self.update_scaling = True self.parameters = parameters
[docs] def set_H0inv(self, H0inv): """ Set user-defined operator corresponding to :code:`H0inv` Input: :code:`H0inv`: Fenics operator with method :code:`solve()` """ self.H0inv = H0inv
[docs] def solve(self, x, b): """ Solve system: :math:`H_{bfgs} x = b` where :math:`H_{bfgs}` is the approximation to the Hessian build by BFGS. That is, we apply .. math:: x = (H_{bfgs})^{-1} b = H_k b where :math:`H_k` matrix is BFGS approximation to the inverse of the Hessian. Computation done via double-loop algorithm. Inputs: :code:`x = dolfin.Vector` - `[out]` :code:`b = dolfin.Vector` - `[in]` """ A = [] if self.help is None: self.help = b.copy() else: self.help.zero() self.help.axpy(1.0, b) for s, y, r in zip(reversed(self.S), reversed(self.Y), reversed(self.R)): a = r * s.inner(self.help) A.append(a) self.help.axpy(-a, y) self.H0inv.solve(x, self.help) # x = H0 * x_copy for s, y, r, a in zip(self.S, self.Y, self.R, reversed(A)): b = r * y.inner(x) x.axpy(a - b, s)
[docs] def update(self, s, y): """ Update BFGS operator with most recent gradient update. To handle potential break from secant condition, update done via damping Inputs: :code:`s = dolfin.Vector` `[in]` - corresponds to update in medium parameters. :code:`y = dolfin.Vector` `[in]` - corresponds to update in gradient. """ damp = self.parameters["BFGS_damping"] memlim = self.parameters["memory_limit"] if self.help is None: self.help = y.copy() else: self.help.zero() sy = s.inner(y) self.solve(self.help, y) yHy = y.inner(self.help) theta = 1.0 if sy < damp*yHy: theta = (1.0-damp)*yHy/(yHy-sy) s *= theta s.axpy(1-theta, self.help) sy = s.inner(y) assert(sy > 0.) rho = 1./sy self.S.append(s.copy()) self.Y.append(y.copy()) self.R.append(rho) # if L-BFGS if len(self.S) > memlim: self.S.pop(0) self.Y.pop(0) self.R.pop(0) self.update_scaling = True # re-scale H0 based on earliest secant information if hasattr(self.H0inv, "d0") and self.update_scaling: s0 = self.S[0] y0 = self.Y[0] d0 = s0.inner(y0) / y0.inner(y0) self.H0inv.d0 = d0 self.update_scaling = False return theta
[docs]class BFGS: """ Implement BFGS technique with backtracking inexact line search and damped updating See `Nocedal & Wright (06), ch.6.2, ch.7.3, ch.18.3` The user must provide a model that describes the forward problem, cost functionals, and all the derivatives for the gradient and the Hessian. More specifically the model object should implement following methods: - :code:`generate_vector()` -> generate the object containing state, parameter, adjoint - :code:`cost(x)` -> evaluate the cost functional, report regularization part and misfit separately - :code:`solveFwd(out, x,tol)` -> solve the possibly non-linear forward problem up a tolerance tol - :code:`solveAdj(out, x,tol)` -> solve the linear adjoint problem - :code:`evalGradientParameter(x, out)` -> evaluate the gradient of the parameter and compute its norm - :code:`applyR(dm, out)` --> Compute :code:`out` = :math:`R dm` - :code:`Rsolver()` --> A solver for the regularization term Type :code:`help(Model)` for additional information """ termination_reasons = [ "Maximum number of Iteration reached", #0 "Norm of the gradient less than tolerance", #1 "Maximum number of backtracking reached", #2 "Norm of (g, da) less than tolerance" #3 ] def __init__(self, model, parameters=BFGS_ParameterList()): """ Initialize the BFGS solver. Type :code:`BFGS_ParameterList().showMe()` for default parameters and their description """ self.model = model self.parameters = parameters self.it = 0 self.converged = False self.ncalls = 0 self.reason = 0 self.final_grad_norm = 0 self.BFGSop = BFGS_operator(self.parameters["BFGS_op"])
[docs] def solve(self, x, H0inv, bounds_xPARAM=None): """ Solve the constrained optimization problem with initial guess :code:`x = [u, m0, p]`. .. note:: :code:`u` and :code:`p` may be :code:`None`. :code:`x` will be overwritten. :code:`H0inv`: the initial approximated inverse of the Hessian for the BFGS operator. It has an optional method :code:`update(x)` that will update the operator based on :code:`x = [u,m,p]`. :code:`bounds_xPARAM`: Bound constraint (list with two entries: min and max). Can be either a scalar value or a :code:`dolfin.Vector`. Return the solution :code:`[u,m,p]` """ if bounds_xPARAM is not None: if hasattr(bounds_xPARAM[0], "get_local"): param_min = bounds_xPARAM[0].get_local() #Assume it is a dolfin vector else: param_min = bounds_xPARAM[0]*np.ones_like(x[PARAMETER].get_local()) #Assume it is a scalar if hasattr(bounds_xPARAM[1], "get_local"): param_max = bounds_xPARAM[1].get_local() #Assume it is a dolfin vector else: param_max = bounds_xPARAM[1]*np.ones_like(x[PARAMETER].get_local()) #Assume it is a scalar rel_tol = self.parameters["rel_tolerance"] abs_tol = self.parameters["abs_tolerance"] max_iter = self.parameters["max_iter"] innerTol = self.parameters["inner_rel_tolerance"] ls_list = self.parameters[self.parameters["globalization"]] c_armijo = ls_list["c_armijo"] max_backtracking_iter = ls_list["max_backtracking_iter"] print_level = self.parameters["print_level"] self.BFGSop.parameters["BFGS_damping"] = self.parameters["BFGS_op"]["BFGS_damping"] self.BFGSop.parameters["memory_limit"] = self.parameters["BFGS_op"]["memory_limit"] self.BFGSop.set_H0inv(H0inv) if x[STATE] is None: x[STATE] = self.model.generate_vector(STATE) if x[ADJOINT] is None: x[ADJOINT] = self.model.generate_vector(ADJOINT) self.model.solveFwd(x[STATE], x, innerTol) self.it = 0 self.converged = False self.ncalls += 1 mhat = self.model.generate_vector(PARAMETER) mg = self.model.generate_vector(PARAMETER) x_star = [None, None, None] + x[3::] x_star[STATE] = self.model.generate_vector(STATE) x_star[PARAMETER] = self.model.generate_vector(PARAMETER) cost_old, reg_old, misfit_old = self.model.cost(x) if(print_level >= 0): print("\n {:3} {:15} {:15} {:15} {:15} {:14} {:14} {:14}".format( "It", "cost", "misfit", "reg", "(g,dm)", "||g||L2", "alpha", "theta")) print( "{:3d} {:15e} {:15e} {:15e} {:15} {:14} {:14} {:14}".format( self.it, cost_old, misfit_old, reg_old, "", "", "", "")) while (self.it < max_iter) and (self.converged == False): self.model.solveAdj(x[ADJOINT], x, innerTol) if hasattr(self.BFGSop.H0inv, "setPoint"): self.BFGSop.H0inv.setPoint(x) mg_old = mg.copy() gradnorm = self.model.evalGradientParameter(x, mg) # Update BFGS if self.it > 0: s = mhat * alpha y = mg - mg_old theta = self.BFGSop.update(s, y) else: gradnorm_ini = gradnorm tol = max(abs_tol, gradnorm_ini*rel_tol) theta = 1.0 # check if solution is reached if (gradnorm < tol) and (self.it > 0): self.converged = True self.reason = 1 break self.it += 1 # compute search direction with BFGS: self.BFGSop.solve(mhat, -mg) # backtracking line-search alpha = 1.0 descent = 0 n_backtrack = 0 mg_mhat = mg.inner(mhat) while descent == 0 and n_backtrack < max_backtracking_iter: # update m and u x_star[PARAMETER].zero() x_star[PARAMETER].axpy(1., x[PARAMETER]) x_star[PARAMETER].axpy(alpha, mhat) x_star[STATE].zero() x_star[STATE].axpy(1., x[STATE]) if bounds_xPARAM is not None: x_star[PARAMETER].set_local(np.maximum(x_star[PARAMETER].get_local(), param_min)) x_star[PARAMETER].set_local(np.minimum(x_star[PARAMETER].get_local(), param_max)) x_star[PARAMETER].apply("") self.model.solveFwd(x_star[STATE], x_star, innerTol) cost_new, reg_new, misfit_new = self.model.cost(x_star) # Check if armijo conditions are satisfied if (cost_new < cost_old + alpha * c_armijo * mg_mhat) or (-mg_mhat <= self.parameters["gdm_tolerance"]): cost_old = cost_new descent = 1 x[PARAMETER].zero() x[PARAMETER].axpy(1., x_star[PARAMETER]) x[STATE].zero() x[STATE].axpy(1., x_star[STATE]) else: n_backtrack += 1 alpha *= 0.5 if print_level >= 0: print( "{:3d} {:15e} {:15e} {:15e} {:15e} {:14e} {:14e} {:14e}".format( self.it, cost_new, misfit_new, reg_new, mg_mhat, gradnorm, alpha, theta)) if n_backtrack == max_backtracking_iter: self.converged = False self.reason = 2 break if -mg_mhat <= self.parameters["gdm_tolerance"]: self.converged = True self.reason = 3 break self.final_grad_norm = gradnorm self.final_cost = cost_new return x