function [z, f, u, v, opt, mu, gnorm, Jcond, localmax, gnormrec, ...
  optmeasure_smoothcase, optmeasure_nonsmoothcase] = newtonsaddle(method, A, z0, options)
% Newton's method in 2 or 3 real variables to find saddle point of the function
% f(Re z, Im z) = sigma_min(A - zI).  This is done by applying Newton's method 
% to solve an equation g = 0. There are 3 different possible choices for g:
% Method 1 (covers generic and nongeneric saddle points): 
%    g(z, mu) = 
%     [mu * grad sigma_N-1  +  (1 - mu) * grad sigma_N]   (two equations)
%     [mu * (sigma_N-1 - sigma_N) = 0]   (third equation: complementarity)
% where the singular values are evaluated at A - zI.
% This is a system of 3 equations in 3 real variables.
% If mu is zero, this simply says the gradient of the smallest singular
% value is zero.  This is the generic case.  In nongeneric cases, mu may 
% not be zero, in which case this forces the two smallest singular values 
% to be equal and their gradients a negative multiple of each other.
% Least squares approximation is used to update mu before each Newton step.
% Method 2 (covers only the generic case):
%    g(z) = grad sigma_N (A - zI) = 0  (two equations)
% Method 3 (covers only the nongeneric case:
%    g(z, mu) = 
%     [mu * grad sigma_N-1  +  (1 - mu) * grad sigma_N]   (two equations)
%     [sigma_N-1 - sigma_N]   (third equation: equate singular values)
% Note that the difference between Methods 1 and 3 is that mu does not
% appear in the 3rd equation of Method 3.
%
% The original idea was that Method 1 combines, and therefore eliminates
% the need for, Methods 2 and 3.  However, Method 1 occasionally fails,
% e.g. on the "foo" example, because it converges to a point where the
% function g is not zero but the Jacobian is very ill conditioned, and
% hence the line search fails.  In this case, Method 2 may work fine.
% So we use Methods 2 and 3 as a backup for when Method 1 fails.
% Method 2 clearly has quadratic convergence in the generic case.
% Methods 1 and 3 are usually quadratically convergent too, but the
% reason is not transparent, given the nonsmoothness of the smallest
% singular value in the generic case: see the discussion in the paper for
% more information about this.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%  NEARDEFMAT 1.0 Copyright (C) 2010  Michael Overton
%%  This program is free software: you can redistribute it and/or modify
%%  it under the terms of the GNU General Public License as published by
%%  the Free Software Foundation, either version 3 of the License, or
%%  (at your option) any later version.
%%
%%  This program is distributed in the hope that it will be useful,
%%  but WITHOUT ANY WARRANTY; without even the implied warranty of
%%  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%%  GNU General Public License for more details.
%%
%%  You should have received a copy of the GNU General Public License
%%  along with this program.  If not, see <http://www.gnu.org/licenses/>.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% the last 3 output args are not used by neardefmat.m, but are used by 
% runmethods.m which was used to generate results for the paper.

% first assign default return values in case iteration fails
u = nan; v = nan; opt = nan; mu = nan; gnorm = inf; Jcond = nan; localmax = 0;
stoptol = options.stoptol; % stopping tolerance
maxit = options.maxit;   % max number of Newton iterations
alphamin = options.minstep; % quit if steplength drops to this value
prtlevel = options.prtlevel;
n = length(A);
I = eye(n);
z = z0;
x = [real(z); imag(z)];
[U, S, V] = svd(A - z*I);
f = S(n,n);
% get the function g and its Jacobian J, as well as the least squares
% estimate of the multiplier mu (0 if method == 2)
% gnormprev = nan;   2009: eliminating gnormprev, prevents stopping with
% the initial guess which may be excellent, and may also cause difficulty
% with trying to compute double singular value too accurately in which case
% can't resolve the singular vectors
[mu, g, J] = getmugj(method, U, S, V, options);
gnorm = norm(g);
Jcond = cond(J);
alpha = 0; % for first line of output
iter = 0;
done = 0;
while ~done
    iter = iter + 1;
    gnormrec(iter) = gnorm;  % this is the norm at the starting pt for iter=1
    if prtlevel > 4
        if method == 1
            plot(x(1),x(2),'r+')
        elseif method == 2
            plot(x(1),x(2),'m+')
        else
            plot(x(1),x(2),'g+')
        end
        hold on
    end
    if prtlevel > 2
        fprintf('newtonsaddle: alpha = %g   norm(g) = %g   cond(J) = %g\n', alpha, gnorm, Jcond)
    end
    % 2009: eliminating gnormprev
    % logic for checking gnormprev, not gnorm, is that once stoptol is
    % satisfied we would still like to take one more step, to try to get to
    % machine precision, but not more, since rounding may prevent us from
    % getting close to machine precision
    % if gnormprev < stoptol | gnorm < stoptol & iter > maxit  
    if gnorm < stoptol   
        if prtlevel > 2
            fprintf('newtonsaddle: stop tolerance satisfied in newtonsaddle (method %d)\n', method)
        end
        break % break out of while loop instead of doing one more iteration
    elseif iter > maxit
        if prtlevel > 2
            fprintf('newtonsaddle: max iters exceeded, quit newtonsaddle (method %d)\n', method)
        end
        return
    else
        newton = -J\g;    
        % in case of methods 1 and 3, newton has a 3rd component but it is
        % ignored and replaced by least squares estimate for mu
        % as long as J is nonsingular, newton direction is guaranteed to be
        % a descent direction for g'*g, although can have numerical trouble 
        % if J is ill conditioned
        if any(isnan(newton) | isinf(newton))
            if prtlevel > 2
                fprintf('newtonsaddle: newton direction contains nan or inf, quit newtonsaddle (method %d)\n', method)
            end
            return
        end
        alpha = 1;
        decrease = 0;     % line search to reduce norm(g)
        while ~decrease & alpha > alphamin
            xnew = x + alpha*newton(1:2);  % 1st two variables only
            [Unew, Snew, Vnew] = svd(A - (xnew(1) + i*xnew(2))*I);
            [munew,gnew,Jnew] = getmugj(method, Unew, Snew, Vnew, options);
            gnormnew = norm(gnew);
            if gnormnew < gnorm 
                decrease = 1;
            else 
                alpha = alpha/2;  % reject and contract if norm(g) not reduced
            end
        end % of line search
        if ~decrease
%             if gnorm < stoptol % so far only checked gnormprev....now eliminated
%                 if prtlevel > 2
%                     fprintf('newtonsaddle: stop tolerance satisfied in newtonsaddle (method %d)\n', method)
%                 end
%                 done = 1;
%             else
            if prtlevel > 2
                 fprintf('newtonsaddle: line search failed, quit newtonsaddle (method %d)\n', method)
            end
            break
            %%%% return; DO NOT RETURN SINCE THIS MAY INDICATE SUCCESS WITH ROUNDING LIMITS REACHED
        else
            % gnormprev = gnorm;
            x = xnew; f = Snew(n,n); U = Unew; S = Snew; V = Vnew; 
            mu = munew; g = gnew; J = Jnew;
            gnorm = norm(g); Jcond = cond(J);
        end
    end % of main work in while loop
end % of while loop
z = x(1) + i*x(2);
if prtlevel > 1
    fprintf('final mu is %g\n', mu)
end
% if method 1 worked, either mu or the difference of the two smallest
% singular value must be close to 0
% Ralph pointed out that it's hard to tell if mu is zero or not, so
% instead we do both residual computations (smooth and nonsmooth) and
% return the obetter one
%  no longer using:  if method == 2 | method == 1 & mu < S(n-1,n-1) - f % smooth case
% FIRST COMPUTE U AND V AS IF MU IS 0
u_smoothcase = U(:,n);
v_smoothcase = V(:,n);
optmeasure_smoothcase = optmeasure(f, u_smoothcase, v_smoothcase, A, z);
% SECOND: COMPUTE u AND v AS IF WE HAVE THE NONSMOOTH CASE (tangential coalescence)
    % Pick u and v from the invariant subspace such that u'*v = 0.
    % This is possible because a convex combination of the singular
    % vectors is zero by construction.  Even in the smooth case,
    % where mu is zero, the following usually works, setting u and v
    % to the left and right singular vectors for the smallest 
    % singular value, but not always, which is why we compute it the other
    % way as well
[u_nonsmoothcase,v_nonsmoothcase]=numrange(U(:,n-1),U(:,n),V(:,n-1),V(:,n),S(n-1,n-1)-S(n,n),mu,prtlevel);
%%%%%%%%DO NOT NEED TO ADD THE SINGULAR DIFFERENCE, BECAUSE THE NONSMOOTH
%%%%%%%%ERROR INCLUDES THE RESIDUALS FOR THE SINGULAR VECTOR EQUATION USING
%%%%%%%%THE SMALLEST SINGULAR VALUE
optmeasure_nonsmoothcase = optmeasure(f, u_nonsmoothcase, v_nonsmoothcase, A, z); %%% + S(n-1,n-1)-f;
if optmeasure_smoothcase <= optmeasure_nonsmoothcase
    u = u_smoothcase;
    v = v_smoothcase;
    opt = optmeasure_smoothcase;
    mu = 0;  % set mu to 0 since smooth defective measure is better
    if prtlevel > 1
        fprintf(' smooth defectiveness measure %g is better than nonsmooth defectiveness measure %g\n',...
          optmeasure_smoothcase, optmeasure_nonsmoothcase);
        fprintf(' therefore, setting mu to 0\n') 
    end
    % when method == 2, J is the Hessian of the smooth function 
    % sigma_min in 2 variables, and when method == 1, it is the leading
    % 2 by 2 block of J
    eigH = eig(J(1:2,1:2)); % works in both cases
    if max(eigH) < -stoptol
        if prtlevel > 2
            fprintf('newtonsaddle: Hessian is negative definite: this is apparently not a saddle point, but keep just in case\n')
        end
        localmax = 1;  % don't discard since might actually be a saddle because of rounding errors, but warn user
    elseif min(eigH) > stoptol
        if prtlevel > 2
            fprintf('newtonsaddle: Hessian positive definite: this is apparently not a saddle point, so reject it\n')
        end
        % this happens for A=gallery('kahan',15,asin(0.1^(1/14)),0),
        % the problem being that the line search fails (decrease = 0) and
        % the final z is quite close to an eigenvalue
        opt = nan; % reject this saddle point
    end
else
    u = u_nonsmoothcase;
    v = v_nonsmoothcase;
    opt = optmeasure_nonsmoothcase;
    if prtlevel > 1
         fprintf(' nonsmooth defectiveness measure %g is better than smooth defectiveness measure %g\n',...
          optmeasure_nonsmoothcase, optmeasure_smoothcase);
    end
    % There is no second order condition to check in the nongeneric case.
    % If the first-order condition is satisfied, the point must be a
    % nonsmooth saddle point, not a local min or max.  Originally looked at
    % curvature of J(1:2,1:2) projected along line tangent to the two coalescing
    % pseudospectral components, arguing by analogy with the usual second
    % order conditions for constrained optimization, but these are not
    % valid in this setting as the singular values are not smooth at the
    % point of coalescence and there are no constraint qualifications.
end 
if prtlevel > 2
    fprintf('newtonsaddle: u''*v = %g\n', u'*v);
end
if prtlevel > 3
    if method == 1
        plot(x(1),x(2),'ro')
    elseif method == 2
        plot(x(1),x(2),'mo')
    else
        plot(x(1),x(2),'go')
    end
    hold on
end