Source code for hippylib.algorithms.NewtonCG

# Copyright (c) 2016-2018, The University of Texas at Austin 
# & University of California--Merced.
# Copyright (c) 2019-2020, The University of Texas at Austin 
# University of California--Merced, Washington University in St. Louis.
#
# All Rights reserved.
# See file COPYRIGHT for details.
#
# This file is part of the hIPPYlib library. For more information and source code
# availability see https://hippylib.github.io.
#
# hIPPYlib is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License (as published by the Free
# Software Foundation) version 2.0 dated June 1991.

import math
from ..utils.parameterList import ParameterList
from ..modeling.reducedHessian import ReducedHessian
from ..modeling.variables import STATE, PARAMETER, ADJOINT
from .cgsolverSteihaug import CGSolverSteihaug


[docs]def LS_ParameterList():
    """
    Generate a ParameterList for line search globalization.
    type: :code:`LS_ParameterList().showMe()` for default values and their descriptions
    """
    parameters = {}
    parameters["c_armijo"]              = [1e-4, "Armijo constant for sufficient reduction"]
    parameters["max_backtracking_iter"] = [10, "Maximum number of backtracking iterations"]
    
    return ParameterList(parameters)

[docs]def TR_ParameterList():
    """
    Generate a ParameterList for Trust Region globalization.
    type: :code:`RT_ParameterList().showMe()` for default values and their descriptions
    """
    parameters = {}
    parameters["eta"] = [0.05, "Reject step if (actual reduction)/(predicted reduction) < eta"]
    
    return ParameterList(parameters)

[docs]def ReducedSpaceNewtonCG_ParameterList():
    """
    Generate a ParameterList for ReducedSpaceNewtonCG.
    type: :code:`ReducedSpaceNewtonCG_ParameterList().showMe()` for default values and their descriptions
    """
    parameters = {}
    parameters["rel_tolerance"]         = [1e-6, "we converge when sqrt(g,g)/sqrt(g_0,g_0) <= rel_tolerance"]
    parameters["abs_tolerance"]         = [1e-12, "we converge when sqrt(g,g) <= abs_tolerance"]
    parameters["gdm_tolerance"]         = [1e-18, "we converge when (g,dm) <= gdm_tolerance"]
    parameters["max_iter"]              = [20, "maximum number of iterations"]
    parameters["globalization"]         = ["LS", "Globalization technique: line search (LS)  or trust region (TR)"]
    parameters["print_level"]           = [0, "Control verbosity of printing screen"]
    parameters["GN_iter"]               = [5, "Number of Gauss Newton iterations before switching to Newton"]
    parameters["cg_coarse_tolerance"]   = [.5, "Coarsest tolerance for the CG method (Eisenstat-Walker)"]
    parameters["cg_max_iter"]           = [100, "Maximum CG iterations"]
    parameters["LS"]                    = [LS_ParameterList(), "Sublist containing LS globalization parameters"]
    parameters["TR"]                    = [TR_ParameterList(), "Sublist containing TR globalization parameters"]
    
    return ParameterList(parameters)
  
    

[docs]class ReducedSpaceNewtonCG:
    
    """
    Inexact Newton-CG method to solve constrained optimization problems in the reduced parameter space.
    The Newton system is solved inexactly by early termination of CG iterations via Eisenstat-Walker
    (to prevent oversolving) and Steihaug (to avoid negative curvature) criteria.
    Globalization is performed using one of the following methods:

    - line search (LS) based on the armijo sufficient reduction condition; or
    - trust region (TR) based on the prior preconditioned norm of the update direction.

    The stopping criterion is based on a control on the norm of the gradient and a control of the
    inner product between the gradient and the Newton direction.
       
    The user must provide a model that describes the forward problem, cost functionals, and all the
    derivatives for the gradient and the Hessian.
    
    More specifically the model object should implement following methods:
    
       - :code:`generate_vector()` -> generate the object containing state, parameter, adjoint
       - :code:`cost(x)` -> evaluate the cost functional, report regularization part and misfit separately
       - :code:`solveFwd(out, x)` -> solve the possibly non linear forward problem
       - :code:`solveAdj(out, x)` -> solve the linear adjoint problem
       - :code:`evalGradientParameter(x, out)` -> evaluate the gradient of the parameter and compute its norm
       - :code:`setPointForHessianEvaluations(x)` -> set the state to perform hessian evaluations
       - :code:`solveFwdIncremental(out, rhs)` -> solve the linearized forward problem for a given :code:`rhs`
       - :code:`solveAdjIncremental(out, rhs)` -> solve the linear adjoint problem for a given :code:`rhs`
       - :code:`applyC(dm, out)`    --> Compute out :math:`= C_x dm`
       - :code:`applyCt(dp, out)`   --> Compute out = :math:`C_x  dp`
       - :code:`applyWuu(du,out)`   --> Compute out = :math:`(W_{uu})_x  du`
       - :code:`applyWmu(dm, out)`  --> Compute out = :math:`(W_{um})_x  dm`
       - :code:`applyWmu(du, out)`  --> Compute out = :math:`W_{mu}  du`
       - :code:`applyR(dm, out)`    --> Compute out = :math:`R  dm`
       - :code:`applyWmm(dm,out)`   --> Compute out = :math:`W_{mm} dm`
       - :code:`Rsolver()`          --> A solver for the regularization term
       
    Type :code:`help(Model)` for additional information
    """
    termination_reasons = [
                           "Maximum number of Iteration reached",      #0
                           "Norm of the gradient less than tolerance", #1
                           "Maximum number of backtracking reached",   #2
                           "Norm of (g, dm) less than tolerance"       #3
                           ]
    
    def __init__(self, model, parameters=ReducedSpaceNewtonCG_ParameterList(), callback = None):
        """
        Initialize the ReducedSpaceNewtonCG.
        Type :code:`ReducedSpaceNewtonCG_ParameterList().showMe()` for list of default parameters
        and their descriptions.
        
        Parameters:
        :code:`model` The model object that describes the inverse problem
        :code:`parameters`: (type :code:`ParameterList`, optional) set parameters for inexact Newton CG
        :code:`callback`: (type function handler with signature :code:`callback(it: int, x: list of dl.Vector): --> None`
               optional callback function to be called at the end of each iteration. Takes as input the iteration number, and
               the list of vectors for the state, parameter, adjoint.
        """
        self.model = model
        self.parameters = parameters
        
        self.it = 0
        self.converged = False
        self.total_cg_iter = 0
        self.ncalls = 0
        self.reason = 0
        self.final_grad_norm = 0
        
        self.callback = callback
        
[docs]    def solve(self, x):
        """

        Input: 
            :code:`x = [u, m, p]` represents the initial guess (u and p may be None). 
            :code:`x` will be overwritten on return.
        """
        if self.model is None:
            raise TypeError("model can not be of type None.")
        
        if x[STATE] is None:
            x[STATE] = self.model.generate_vector(STATE)
            
        if x[ADJOINT] is None:
            x[ADJOINT] = self.model.generate_vector(ADJOINT)
            
        if self.parameters["globalization"] == "LS":
            return self._solve_ls(x)
        elif self.parameters["globalization"] == "TR":
            return self._solve_tr(x)
        else:
            raise ValueError(self.parameters["globalization"])
        
[docs]    def _solve_ls(self,x):
        """
        Solve the constrained optimization problem with initial guess :code:`x`.
        """
        rel_tol = self.parameters["rel_tolerance"]
        abs_tol = self.parameters["abs_tolerance"]
        max_iter = self.parameters["max_iter"]
        print_level = self.parameters["print_level"]
        GN_iter = self.parameters["GN_iter"]
        cg_coarse_tolerance = self.parameters["cg_coarse_tolerance"]
        cg_max_iter         = self.parameters["cg_max_iter"]
        
        c_armijo = self.parameters["LS"]["c_armijo"]
        max_backtracking_iter = self.parameters["LS"]["max_backtracking_iter"]
        
        self.model.solveFwd(x[STATE], x)
        
        self.it = 0
        self.converged = False
        self.ncalls += 1
        
        mhat = self.model.generate_vector(PARAMETER)    
        mg = self.model.generate_vector(PARAMETER)
                
        x_star = [None, None, None] + x[3::]
        x_star[STATE]     = self.model.generate_vector(STATE)
        x_star[PARAMETER] = self.model.generate_vector(PARAMETER)
        
        cost_old, _, _ = self.model.cost(x)
        
        while (self.it < max_iter) and (self.converged == False):
            self.model.solveAdj(x[ADJOINT], x)
            
            self.model.setPointForHessianEvaluations(x, gauss_newton_approx=(self.it < GN_iter) )
            gradnorm = self.model.evalGradientParameter(x, mg)
            
            if self.it == 0:
                gradnorm_ini = gradnorm
                tol = max(abs_tol, gradnorm_ini*rel_tol)
                
            # check if solution is reached
            if (gradnorm < tol) and (self.it > 0):
                self.converged = True
                self.reason = 1
                break
            
            self.it += 1
            
            tolcg = min(cg_coarse_tolerance, math.sqrt(gradnorm/gradnorm_ini))
            
            HessApply = ReducedHessian(self.model)
            solver = CGSolverSteihaug(comm = self.model.prior.R.mpi_comm())
            solver.set_operator(HessApply)
            solver.set_preconditioner(self.model.Rsolver())
            solver.parameters["rel_tolerance"] = tolcg
            solver.parameters["max_iter"] = cg_max_iter
            solver.parameters["zero_initial_guess"] = True
            solver.parameters["print_level"] = print_level-1
            
            solver.solve(mhat, -mg)
            self.total_cg_iter += HessApply.ncalls
            
            alpha = 1.0
            descent = 0
            n_backtrack = 0
            
            mg_mhat = mg.inner(mhat)
            
            while descent == 0 and n_backtrack < max_backtracking_iter:
                # update m and u
                x_star[PARAMETER].zero()
                x_star[PARAMETER].axpy(1., x[PARAMETER])
                x_star[PARAMETER].axpy(alpha, mhat)
                x_star[STATE].zero()
                x_star[STATE].axpy(1., x[STATE])
                self.model.solveFwd(x_star[STATE], x_star)
                
                cost_new, reg_new, misfit_new = self.model.cost(x_star)
                  
                # Check if armijo conditions are satisfied
                if (cost_new < cost_old + alpha * c_armijo * mg_mhat) or (-mg_mhat <= self.parameters["gdm_tolerance"]):
                    cost_old = cost_new
                    descent = 1
                    x[PARAMETER].zero()
                    x[PARAMETER].axpy(1., x_star[PARAMETER])
                    x[STATE].zero()
                    x[STATE].axpy(1., x_star[STATE])
                else:
                    n_backtrack += 1
                    alpha *= 0.5
                            
            if(print_level >= 0) and (self.it == 1):
                print( "\n{0:3} {1:3} {2:15} {3:15} {4:15} {5:15} {6:14} {7:14} {8:14}".format(
                      "It", "cg_it", "cost", "misfit", "reg", "(g,dm)", "||g||L2", "alpha", "tolcg") )
                
            if print_level >= 0:
                print( "{0:3d} {1:3d} {2:15e} {3:15e} {4:15e} {5:15e} {6:14e} {7:14e} {8:14e}".format(
                        self.it, HessApply.ncalls, cost_new, misfit_new, reg_new, mg_mhat, gradnorm, alpha, tolcg) )
                
            if self.callback:
                self.callback(self.it, x)
                
                
            if n_backtrack == max_backtracking_iter:
                self.converged = False
                self.reason = 2
                break
            
            if -mg_mhat <= self.parameters["gdm_tolerance"]:
                self.converged = True
                self.reason = 3
                break
                            
        self.final_grad_norm = gradnorm
        self.final_cost      = cost_new
        return x
    
[docs]    def _solve_tr(self,x):
        rel_tol = self.parameters["rel_tolerance"]
        abs_tol = self.parameters["abs_tolerance"]
        max_iter = self.parameters["max_iter"]
        print_level = self.parameters["print_level"]
        GN_iter = self.parameters["GN_iter"]
        cg_coarse_tolerance = self.parameters["cg_coarse_tolerance"]
        cg_max_iter         = self.parameters["cg_max_iter"]
        
        eta_TR = self.parameters["TR"]["eta"]
        delta_TR = None
        
        
        self.model.solveFwd(x[STATE], x)
        
        self.it = 0
        self.converged = False
        self.ncalls += 1
        
        mhat = self.model.generate_vector(PARAMETER) 
        R_mhat = self.model.generate_vector(PARAMETER)   

        mg = self.model.generate_vector(PARAMETER)
        
        x_star = [None, None, None] + x[3::]
        x_star[STATE]     = self.model.generate_vector(STATE)
        x_star[PARAMETER] = self.model.generate_vector(PARAMETER)
        
        cost_old, reg_old, misfit_old = self.model.cost(x)
        while (self.it < max_iter) and (self.converged == False):
            self.model.solveAdj(x[ADJOINT], x)
            
            self.model.setPointForHessianEvaluations(x, gauss_newton_approx=(self.it < GN_iter) )
            gradnorm = self.model.evalGradientParameter(x, mg)
            
            if self.it == 0:
                gradnorm_ini = gradnorm
                tol = max(abs_tol, gradnorm_ini*rel_tol)
                
            # check if solution is reached
            if (gradnorm < tol) and (self.it > 0):
                self.converged = True
                self.reason = 1
                break
            
            self.it += 1
            

            tolcg = min(cg_coarse_tolerance, math.sqrt(gradnorm/gradnorm_ini))
            
            HessApply = ReducedHessian(self.model)
            solver = CGSolverSteihaug(comm = self.model.prior.R.mpi_comm())
            solver.set_operator(HessApply)
            solver.set_preconditioner(self.model.Rsolver())
            if self.it > 1:
                solver.set_TR(delta_TR, self.model.prior.R)
            solver.parameters["rel_tolerance"] = tolcg
            self.parameters["max_iter"]        = cg_max_iter
            solver.parameters["zero_initial_guess"] = True
            solver.parameters["print_level"] = print_level-1
            
            solver.solve(mhat, -mg)
            self.total_cg_iter += HessApply.ncalls

            if self.it == 1:
                self.model.prior.R.mult(mhat,R_mhat)
                mhat_Rnorm = R_mhat.inner(mhat)
                delta_TR = max(math.sqrt(mhat_Rnorm),1)

            x_star[PARAMETER].zero()
            x_star[PARAMETER].axpy(1., x[PARAMETER])
            x_star[PARAMETER].axpy(1., mhat)   #m_star = m +mhat
            x_star[STATE].zero()
            x_star[STATE].axpy(1., x[STATE])      #u_star = u
            self.model.solveFwd(x_star[STATE], x_star)
            cost_star, reg_star, misfit_star = self.model.cost(x_star)
            ACTUAL_RED = cost_old - cost_star
            #Calculate Predicted Reduction
            H_mhat = self.model.generate_vector(PARAMETER)
            H_mhat.zero()
            HessApply.mult(mhat,H_mhat)
            mg_mhat = mg.inner(mhat)
            PRED_RED = -0.5*mhat.inner(H_mhat) - mg_mhat
            # print( "PREDICTED REDUCTION", PRED_RED, "ACTUAL REDUCTION", ACTUAL_RED)
            rho_TR = ACTUAL_RED/PRED_RED


            # Nocedal and Wright Trust Region conditions (page 69)
            if rho_TR < 0.25:
                delta_TR *= 0.5
            elif rho_TR > 0.75 and solver.reasonid == 3:
                delta_TR *= 2.0
            

            # print( "rho_TR", rho_TR, "eta_TR", eta_TR, "rho_TR > eta_TR?", rho_TR > eta_TR , "\n")
            if rho_TR > eta_TR:
                x[PARAMETER].zero()
                x[PARAMETER].axpy(1.0,x_star[PARAMETER])
                x[STATE].zero()
                x[STATE].axpy(1.0,x_star[STATE])
                cost_old = cost_star
                reg_old = reg_star
                misfit_old = misfit_star
                accept_step = True
            else:
                accept_step = False
                
            if self.callback:
                self.callback(self.it, x)
                
            if(print_level >= 0) and (self.it == 1):
                print( "\n{0:3} {1:3} {2:15} {3:15} {4:15} {5:15} {6:14} {7:14} {8:14} {9:11} {10:14}".format(
                      "It", "cg_it", "cost", "misfit", "reg", "(g,dm)", "||g||L2", "TR Radius", "rho_TR", "Accept Step","tolcg") )
                
            if print_level >= 0:
                print( "{0:3d} {1:3d} {2:15e} {3:15e} {4:15e} {5:15e} {6:14e} {7:14e} {8:14e} {9:11} {10:14e}".format(
                        self.it, HessApply.ncalls, cost_old, misfit_old, reg_old, mg_mhat, gradnorm, delta_TR, rho_TR, accept_step,tolcg) )
                

            #TR radius can make this term arbitrarily small and prematurely exit.
            if -mg_mhat <= self.parameters["gdm_tolerance"]:
                self.converged = True
                self.reason = 3
                break
                            
        self.final_grad_norm = gradnorm
        self.final_cost      = cost_old
        return x