//Scythe_Optimize.cc
//
// This file is the definition of the Optimizing functions
// for the SCYTHE project.  
//
// Scythe C++ Library
// Copyright (C) 2000 Kevin M. Quinn and Andrew D. Martin
//
// This code written by:
//
// Kevin Quinn
// Assistant Professor
// Dept. of Political Science and 
// Center for Statistics and the Social Sciences
// Box 354322
// University of Washington
// Seattle, WA  98195-4322
// quinn@stat.washington.edu
//
// Andrew D. Martin
// Assistant Professor
// Dept. of Political Science
// Campus Box 1063
// Washington University
// St. Louis, MO 63130
// admartin@artsci.wustl.edu
// 
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
// USA

#ifndef SCYTHE_OPTIMIZE_CC
#define SCYTHE_OPTIMIZE_CC

#include "Scythe_Optimize.h"

// Avoid NameSpace Pollution
namespace SCYTHE {

//  Notes: Algorithm taken from Sedgewick, Robert. 1992. Algorithms
//         in C++. Addison Wesley. pg. 561
//  4/23/2001 (KQ)

//! Calculate the Definite Integral of a function from \a a to \a b.
/*!
 * Calculate the Definite Integral of a function from \a a to \a b.
 * \param pfunc A pointer (of type double) to function. \a pfunc must
 * take a double and return a double.
 * \param a a double, the lower bound of integration.
 * \param b a double, the upper bound of integration.
 * \param n an integer, the number of subintervals.
 * \return Value of the Integral (type: double).
*/
double intsimp(double (*fun)(const double&), const double& a,
                const double& b, const int N){
   if (a > b){
     cerr << "ERROR 2001: Lower limit larger than upper limit in "
          << "SCYTHE::intsimp()" 
          << endl;
     exit(2001);
   }
   if (N<=0){
     cerr << "ERROR 2002: N non-positive in SCYTHE::intsimp()" 
          << endl;
     exit(2002);
   }
   double I = 0.0;
   double w = (b-a)/N;
   for (int i = 1; i <= N; i++){
     I += w*(fun(a+(i-1)*w) +
             4*fun(a-w/2+i*w) +
             fun(a+i*w))/6;
   }
   return I;
}

//  Notes: Algorithm taken from Sedgewick, Robert. 1992. Algorithms
//         in C++. Addison Wesley. pg. 562
// 4/23/2001 (KQ)

//! Calculate the Definite Integral of a function from \e a to \e b.
/*!
 * Calculate the Definite Integral of a function from \a a to \a b.
 * \param pfunc A pointer (of type double) to function. \a pfunc 
 * must take a double and return a double.
 * \param a a double, the lower bound of integration.
 * \param b a double, the upper bound of integration.
 * \param n an integer, the number of subintervals.
 * \return Value of the Integral (type: double).
*/
double adaptsimp(double (*fun)(const double&), const double& a, 
                 const double& b, const int N, const double tol){
   if (a > b){
     cerr << "ERROR 2003: lower limit larger than upper limit in "
          << "SCYTHE::adaptsimp()" 
          << endl;
     exit(2003);
   }
   if (N<=0){
     cerr << "ERROR 2004: N non-positive in SCYTHE::adaptsimp()" << endl;
     exit(2004);
   }

   double I = intsimp(fun, a, b, N);
   if (::fabs(I - intsimp(fun, a, b, N/2)) > tol)
     return adaptsimp(fun, a, (a+b)/2, N, tol) +
       adaptsimp(fun, (a+b)/2, b, N, tol);

   return I;
}



//  Notes: Algorithm taken from Nocedal and Wright. 1999. section 7.1
//         with some additional tricks from Press et al. NRC
// 4/25/2001 (KQ)
double donothing(const Matrix& x){return 0.0;};

//! Numerically calculate the gradient of a function
/*!
 * Numerically calculates the gradient of a function at \a theta 
 * using a forward difference formula.
 * \param fun A pointer (of type double) to function. \a fun must take 
 * 3 Matrix objects and return a double.
 * \param theta a Matrix (column vector) of parameter values.
 * \param y a Matrix argument sent to \a fun
 * \param X a Matrix argument sent to \a fun
 * \return Gradient vector at \a theta (type: Matrix object)
*/
Matrix gradfdif(double (*fun)(const Matrix&, const Matrix&, const Matrix&), 
		const Matrix& theta, const Matrix& y, const Matrix& X){
   if (cols(theta)>1){
     cerr << "ERROR 2005: theta not column vector in SCYTHE::gradfdif()" 
          << endl;
     exit(2005);
   }

   int k = size(theta);
   // stepsize CAREFUL-- THIS IS MACHINE-SPECIFIC!!!
   double h = ::sqrt(2.25e-16);

   Matrix grad = Matrix(k,1);

   for (int i=0; i<k; ++i){
     Matrix e = Matrix(k,1);
     e[i] = h;
     Matrix temp = theta + e;
     donothing(temp);
     e = temp - theta;
     grad[i] = (fun(theta+e, y, X)-fun(theta, y, X))/e[i];
   }

   return grad;
}

//  Notes: Algorithm taken from Nocedal and Wright. 1999. section 7.1
//         with some additional tricks from Press et al. NRC
// 4/25/2001 (KQ)
double donothing(const double& x)  { return 0.0; };

//! Numerically calculates the first derivative of a function
/*!
 * Numerically calculates the first derivative off a function with 
 * respect to \a alpha at \a (theta+alpha*p) using a forward difference 
 * formula.  Primarily useful in linesearches.
 * \param fun A pointer (of type double) to function. \a fun must 
 * take 3 Matrix objects and return a double.
 * \param alpha a double, the step length.
 * \param theta a Matrix (column vector) of parameter values.
 * \param PP a Matrix (direction vector).
 * \param y a Matrix argument sent to \a fun
 * \param X a Matrix argument sent to \a fun
 * \return Value of first derivative at point \a theta (type: double).
 */

double gradfdifls(double (*fun)(const Matrix&, const Matrix&, const Matrix&), 
		  const double& alpha, const Matrix& theta, const Matrix& p, 
		  const Matrix& y, const Matrix& X){
   if (cols(theta)>1){
     cerr << "ERROR 2006: Theta not column vector in SCYTHE::gradfdifls()" 
          << endl;
     exit(2006);
   }
   if (cols(theta)>1){
     cerr << "ERROR 2007: Vector p not column vector in SCYTHE::gradfdifls()" 
          << endl;
     exit(2007);
   }

   int k = size(theta);
   // stepsize CAREFUL-- THIS IS MACHINE-SPECIFIC!!!
   double h = ::sqrt(2.25e-16); 

   double deriv = 0.0;

   for (int i=0; i<k; ++i){
     double temp = alpha + h;
     donothing(temp);
     double e = temp - alpha;
     deriv = (fun(theta+(alpha+e)*p, y, X)-fun(theta+alpha*p, y, X))/e;
   }
   return deriv;
}



//  Notes: Algorithm taken from Nocedal and Wright. 1999. section 7.1
//         with some additional tricks from Press et al. NRC
// 4/29/2001 (KQ)

//! Numerically calculates the gradient of a function
/*!
 * Numerically calculates the gradient of a function at \a theta 
 * using a forward difference formula.
 * \param fun A pointer (of type double) to function. \a fun must 
 * take a Matrix object and return a Matrix object.
 * \param theta a Matrix (column vector) of parameter values.
 * \return Value of Jacobian at point \a theta (type: Matrix).
*/
Matrix jacfdif(Matrix (*fun)(const Matrix&), const Matrix& theta){
   if (cols(theta)>1){
     cerr << "ERROR 2008: Theta not column vector in SCYTHE::jacfdif()" 
          << endl;
     exit(2008);
   }

   Matrix fval = fun(theta);

   int k = rows(theta);
   int n = rows(fval);
   // stepsize CAREFUL -- THIS IS MACHINE-SPECIFIC!!!
   double h = ::sqrt(2.25e-16); 
   Matrix J = Matrix(n,k);

   for (int i=0; i<k; ++i) {
     Matrix e = Matrix(k,1);
     e[i] = h;
     Matrix temp = theta + e;
     donothing(temp);
     e = temp - theta;
     Matrix fthetae = fun(theta+e);
     Matrix ftheta = fun(theta);
     for (int j=0; j<n; ++j) {
       J(j,i) = (fthetae[j]-ftheta[j])/e[i];
     }
   }
   
   return J;
}




// 7/12/2001 (KQ)
//! Numerically calculates the Hessian of a function
/*!
 * Numerically calculates the Hessian of a function at \a theta 
 * using a central difference approximation.
 * \param fun A pointer (of type double) to function. \a fun must 
 * take 3 Matrix objects and return a double.
 * \param theta a Matrix (column vector) of parameter values.
 * \param y a Matrix argument sent to \a fun
 * \param X a Matrix argument sent to \a fun
 * \return Value of Hessian at point \a theta (type: Matrix).
*/
Matrix hesscdif(double (*fun)(const Matrix&, const Matrix&, const Matrix&), 
		const Matrix& theta, const Matrix& y, const Matrix& X){
   if (cols(theta)>1){
     cerr << "ERROR 2018: Theta not column vector in SCYTHE::hess()" 
          << endl;
     exit(2018);
   }

   double fval = fun(theta,y,X);

   int k = rows(theta);

   // stepsize CAREFUL -- THIS IS MACHINE-SPECIFIC!!!
   double h2 = 1e-10;
   double h = ::sqrt(h2); 
   Matrix H = Matrix(k,k);

   for (int i=0; i<k; ++i) {
     Matrix ei = Matrix(k,1);
     ei[i] = h;
     Matrix temp = theta + ei;
     donothing(temp);
     ei = temp - theta;
     for (int j=0; j<k; ++j){
       Matrix ej = Matrix(k,1);
       ej[j] = h;
       Matrix temp = theta + ej;
       donothing(temp);
       ej = temp - theta;

       if (i==j){
	 H(i,i) = ( -fun(theta + 2.0*ei,y,X) + 16.0*fun(theta+ei,y,X) - 
		    30.0*fval + 16.0*fun(theta-ei,y,X) - 
		    fun(theta - 2.0*ei,y,X)) / 
	            (12.0 * h2);
       }
       else{
	 H(i,j) = ( fun(theta + ei +ej,y,X) - fun(theta + ei - ej,y,X) - 
		    fun(theta - ei + ej,y,X) + fun(theta - ei - ej,y,X)) /
	   (4.0 * h2);
       }
     }
   }
   
   return H;
}




//  Notes: Algorithm taken from Nocedal and Wright. 1999. Procedure 3.1
// 4/25/2001 (KQ)
//! Performs a linesearch to find the step length (\a alpha)
/*!
 * Performs a linesearch to find the step length (\a alpha) that 
 * approximately minimizes an implied 1-d function
 * \param fun A pointer (of type double) to function. \a fun must 
 * take 3 Matrix objects and return a double.
 * \param theta a Matrix (column vector) of parameter values.
 * \param p a Matrix object (direction vector).
 * \param y a Matrix argument sent to \a fun
 * \param X a Matrix argument sent to \a fun
 * \return Value of Step Length (\a alpha) that approximates 
 * the 1-d function (type: double).
*/
double linesearch1(double (*fun)(const Matrix&, const Matrix&, const Matrix&), 
		   const Matrix& theta, const Matrix& p, const Matrix& y,
		   const Matrix& X){
   if (cols(theta)>1){
     cerr << "ERROR 2009: Theta not column vector in SCYTHE::linesearch1()" 
          << endl;
     exit(2009);
   }
   if (cols(p)>1){
     cerr << "ERROR 2010: Vector p not column vector in SCYTHE::linesearch1()" 
          << endl;
     exit(2010);
   }

   double alpha_bar = 1.0;
   double rho = 0.9;
   double c   = 0.5;
   double alpha = alpha_bar;
   Matrix fgrad = gradfdif(fun, theta, y, X);

   while (fun(theta + alpha*p, y, X)> (fun(theta, y, X) + 
				       c*alpha*t(fgrad)*p)[0]){
     alpha = rho * alpha;
   }

   return alpha;
}




//  Notes: Algorithm taken from Nocedal and Wright. 1999. Algorithm 3.2
// 4/25/2001 (KQ)

//! Performs a linesearch to find the step length (\a alpha)
/*!
 * Performs a linesearch to find the step length (\a alpha) that 
 * approximately minimizes an implied 1-d function.
 * \param fun A pointer (of type double) to function. \a fun must 
 * take 3 Matrix objects and return a double.
 * \param theta a Matrix (column vector) of parameter values.
 * \param p a Matrix object (direction vector).
 * \param y a Matrix argument sent to \a fun
 * \param X a Matrix argument sent to \a fun
 * \return Value of Step Length (\a alpha) that approximates the 
 * 1-d function (type: double).
*/


double linesearch2(double (*fun)(const Matrix&, const Matrix&, const Matrix&), 
		   const Matrix& theta, const Matrix& p, const Matrix& y,
		   const Matrix& X){
   if (cols(theta)>1){
     cerr << "ERROR 2016: theta not column vector in SCYTHE::linesearch2()" 
          << endl;
     exit(2016);
   }
   if (cols(p)>1){
     cerr << "ERROR 2017: p not column vector in SCYTHE::linesearch2()" 
          << endl;
     exit(2017);
   }

   double alpha_last = 0.0;
   double alpha_cur = 1.0;
   double alpha_max = 10.0;
   double c1 = 1e-4;
   double c2 = 0.5;
   int max_iter = 50;
   double fgradalpha0 = gradfdifls(fun, 0, theta, p, y, X);

   for (int i=0; i<max_iter; ++i){
     double phi_cur = fun(theta+alpha_cur*p, y, X);
     double phi_last = fun(theta+alpha_last*p, y, X);
     
     if ( (phi_cur > (fun(theta,y,X) + c1*alpha_cur*fgradalpha0)) ||
         ( (phi_cur >= phi_last) && (i > 0))){
       double alphastar = zoom(fun, alpha_last, alpha_cur, theta, p, y, X);
       return alphastar;
     }

     double fgradalpha_cur = gradfdifls(fun, alpha_cur, theta, p, y, X);
     if ( ::fabs(fgradalpha_cur) <= -1*c2*fgradalpha0)
       return alpha_cur;

     if ( fgradalpha_cur >= 0.0){
       double alphastar = zoom(fun, alpha_cur, alpha_last, theta, p, y, X);
       return alphastar;
     }
     alpha_last = alpha_cur;
     alpha_cur = runif()*(alpha_max - alpha_cur) + alpha_cur;
   }

   return 0.001;
}



//  Notes: Algorithm taken from Nocedal and Wright. 1999. Algorithm 3.3
// 4/25/2001 (KQ)
//! Find the minimum of a function once bracketed
/*!
 * Finds the minimum of a function once bracketed (i.e. over a 
 * closed interval).
 * \param fun A pointer (of type double) to function. \a fun must 
 * take 3 Matrix objects and return a double.
 * \param theta a Matrix (column vector) of parameter values to 
 * anchor the 1-d function.
 * \param alo a double, lower bound of bracket.
 * \param ahi a double, upper bound of bracket.
 * \param p a Matrix object (direction vector) that creates the 
 * 1-d function.
 * \param y a Matrix argument sent to \a fun
 * \param X a Matrix argument sent to \a fun
 * \return Value of Step Length (\a alpha) that approximates 
 * the 1-d function (type: double).
 */
double zoom(double (*fun)(const Matrix&, const Matrix&, const Matrix&), 
	    const double& alo,
	    const double& ahi, const Matrix& theta, const Matrix& p,
	    const Matrix& y, const Matrix& X ){
   if (cols(theta)>1){
     cerr << "ERROR 2011: Theta not column vector in SCYTHE::zoom()" 
          << endl;
     exit(2011);
   }
   if (cols(p)>1){
     cerr << "ERROR 2012: Vector p not column vector in SCYTHE::zoom()" 
          << endl;
     exit(2011);
   }

   double alpha_lo = alo;
   double alpha_hi = ahi;
   double alpha_j = (alo + ahi)/2.0;
   double phi_0 = fun(theta, y, X);
   double c1 = 1e-4;
   double c2 = 0.5;
   double fgrad0 = gradfdifls(fun, 0, theta, p, y, X);

   int count = 0;
   int maxit = 20;
   while(count < maxit){
     double phi_j = fun(theta + alpha_j*p, y, X);
     double phi_lo = fun(theta + alpha_lo*p, y, X);
     
     if ((phi_j > (phi_0 + c1*alpha_j*fgrad0)) ||
         (phi_j >= phi_lo)){
       alpha_hi = alpha_j;
     }
     else{
       double fgradj = gradfdifls(fun, alpha_j, theta, p, y, X);
       if (::fabs(fgradj) <= -1*c2*fgrad0){
         return alpha_j;
       }
       if ( fgradj*(alpha_hi - alpha_lo) >= 0){
         alpha_hi = alpha_lo;
       }
       alpha_lo = alpha_j;
     }
     ++count;
   }
   
   return alpha_j;
}




//  Notes: Algorithm taken from Nocedal and Wright. 1999. algorithm 8.1
//  4/25/2001 (KQ)
//! Find the minimum of a function using the BFGS algorithm
/*!
 * Find the minimum of a function using the BFGS algorithm
 * \param fun A pointer (of type double) to function. \a fun must 
 * take 3 Matrix objects and return a double.
 * \param theta a Matrix (column vector) of parameter values at 
 * which to start the optimization.
 * \param maxit integer value of maximum iterations allowed 
 * (DEFAULT VALUE: 1000).
 * \param tolerance double value of function tolerance for 
 * accuracy purposes (DEFAULT VALUE: 1 X 10^-6).
 * \param y a Matrix argument sent to \a fun
 * \param X a Matrix argument sent to \a fun
 * \return Vector of values that minimize the function (type: Matrix).
*/
Matrix BFGS(double (*fun)(const Matrix&, const Matrix&, const Matrix&), 
	    const Matrix& theta, const Matrix& y, 
	    const Matrix& X, const int& maxit=1000, 
	    const double& tolerance=1e-4){
   if (cols(theta)>1){
     cerr << "ERROR 2013: Theta not column vector in SCYTHE::BFGS()" 
          << endl;
     exit(2013);
   }
   int n = size(theta);

   Matrix H = inv(hesscdif(fun, theta, y, X)); // H is initial inverse hessian
   Matrix fgrad = gradfdif(fun, theta, y, X); // gradient at starting values
   Matrix thetamin = theta;
   Matrix fgrad_new = fgrad;
   Matrix I = eye(n);

   int count = 0;
   while( (t(fgrad_new)*fgrad_new)[0] > tolerance){
     Matrix p = -1*H*fgrad;
     double alpha = linesearch1(fun, thetamin, p, y, X);
     Matrix thetamin_new = thetamin + alpha*p;
     fgrad_new = gradfdif(fun, thetamin_new, y, X);
     Matrix s = thetamin_new - thetamin;
     Matrix y = fgrad_new - fgrad;
     double rho = 1.0/(t(y)*s)[0];
     H = (I - rho*s*t(y))*H*(I - rho*y*t(s)) + rho*s*t(s);
     thetamin = thetamin_new;
     fgrad = fgrad_new;
     ++count;

     cout << "BFGS iteration = " << count << endl;
     t(thetamin).print();
     t(fgrad).print();
     (t(fgrad) * fgrad).print();

     if (count > maxit){
       cerr << "ERROR 2019: SCYTHE::BFGS(double (*fun)(const Matrix&),"<<
	 " const Matrix& theta,const int&, const double&) did not converge"<<
	 " Try better starting values." << endl;
       exit(2019);
     }
   }
   
   return thetamin;
}


//  Notes: Algorithm taken from Nocedal and Wright. 1999. algorithm 11.3
//         line search is not used to determine alpha (this should probably
//          be changed at some point.
// 4/29/2001 (KQ)

//! Zero a function using Broyen's Method
/*!
 * Zero a function using Broyen's Method.  Solves a system of \a n 
 * nonlinear equations in \a n unknowns.
 * \param fun A pointer (of type double) to function. \a fun must 
 * take a Matrix object and return a Matrix object.
 * \param theta a Matrix (column vector) of parameter values at 
 * which to start the optimization.
 * \param maxit integer value of maximum iterations allowed 
 * (DEFAULT VALUE: 5000).
 * \param tolerance double value of function tolerance for accuracy 
 * purposes (DEFAULT VALUE: 1 X 10^-6).
 * \return Vector of values that zero's the function (type: Matrix).
 */

Matrix nls_broyden(Matrix (*fun)(const Matrix&), const Matrix& theta, 
		   const int& maxit=5000, const double& tolerance=1e-6){
   if (cols(theta)>1){
     cerr << "ERROR 2014: Theta not column vector in SCYTHE::nls_broyden()" 
          << endl;
     exit(2014);
   }

   Matrix thetastar = theta;
   Matrix B = jacfdif(fun, thetastar);

    for (int i=0; i<maxit; ++i){
     Matrix fthetastar = fun(thetastar);
     Matrix p = lu_solve(B, -1*fthetastar);
     double alpha = 1.0;
     Matrix thetastar_new = thetastar + alpha*p;
     Matrix fthetastar_new = fun(thetastar_new);
     Matrix s = thetastar_new - thetastar;
     Matrix y = fthetastar_new - fthetastar;
     B = B + ((y - B * s)*t(s))/(t(s)*s);
     thetastar = thetastar_new;
     if (max(fabs(fthetastar_new)) < tolerance)
       return thetastar;
   }

   cerr << "Error: 2015: SCYTHE::nls_broyden did not converge. Try setting "
        << "maxit higher or use a better"
        << endl;
   exit(2015);
}

}  // namespace dec

#endif /* SCYTHE_OPTIMIZE_CC */

