/*
 * lyapsol.c - mex file:  solves the Lyapunov systems arising in forming
 *                        the SD Schur complement
 *
 * synopsis:   B = lyapsol(A,Q,Evsum,blk)
 *
 * inputs:
 *    A           an nxn (block diagonal) sparse symmetric matrix
 *    Q           an nxn (block diagonal) sparse matrix
 *    Evsum       an nxn (block diagonal) sparse matrix
 *    blk         the block diagonal structure
 *
 * output:
 *    B           an nxn (block diagonal) sparse symmetric matrix
 *                obtained a follows:
 *                    (*)  Q * ((Q'*A*Q) ./ Evsum) * Q'
 *
 * Note: this routine can only be called if there are more than on block;
 *       so sdschur must check for the number of blocks and compute (*)
 *       inline if the case of one block, and call this routine otherwise.
 *       It follows that the matrices A, Q, and Evsum must be sparse; A
 *       and Q may have sparse blocks, but the blocks of Evsum must be full.
 *       Indeed, Evsum must have full blocks with strictly positive block
 *       entries. Thus, the matrix B will also have full blocks.
 *       Note that the blocks of Q are obtained by a call to the Matlab
 *       function eig which returns a full matrix of eigenvectors. In fact
 *       eig does not accept a full matrix argument, so even if the blocks
 *       of the matrix whose eigenvalues one wishes to compute are sparse,
 *       the eigenvectors are computed by  eig(full(block)). Hence, the
 *       blocks of Q will be almost always sparse; a notable exception is
 *       on the first iteration when the SD variables were initialized with
 *       multiple of the identity matrix.
 *
 * SDPPACK Version 0.9 BETA
 * Copyright (c) 1997 by
 * F. Alizadeh, J.-P. Haeberly, M. Nayakkankuppam, M.L. Overton, S. Schmieta
 * Last modified : 6/7/97
 */
#include <math.h>
#include "mex.h"

/* Input Arguments */
#define  A_IN       prhs[0]
#define  Q_IN       prhs[1]
#define  Ev_IN      prhs[2]
#define  blk_IN     prhs[3]

/* Output Arguments */
#define  B_OUT  plhs[0]

#if !defined(max)
#define  max(A, B)   ((A) > (B) ? (A) : (B))
#endif

#if !defined(min)
#define  min(A, B)   ((A) < (B) ? (A) : (B))
#endif

static void lyapsol(
double   *Bpr,
double   *Apr,
int      *Air,
int      *Ajc,
double   *Qpr,
int      *Qir,
int      *Qjc,
double   *Evpr,
int      nblk,
double   *blk,
int      maxbsize,
int      eltsize
)
{
   int i,j,k,bsize,bsize2,blkidx,colidx,baseidx,nzblk;
   double *Ablkpr,*Qblkpr,*Bidx,*Aidx,*Qidx,*Evidx,*lhspr;
   mxArray *Ablk,*Qblk,*plhs[1],*prhs[2];
/*
 * Recall: All matrices are block diagonal with multiple blocks.
 * Moreover, they all have full blocks with the possible exception of A.
 */
/*
 * Create temporary full matrices to hold the blocks
 */
   Ablk = mxCreateDoubleMatrix(maxbsize,maxbsize,mxREAL);
   Qblk = mxCreateDoubleMatrix(maxbsize,maxbsize,mxREAL);
   Ablkpr = mxGetPr(Ablk);
   Qblkpr = mxGetPr(Qblk);
/*
 * initialize column index and pointers to the the arrays Bpr, Apr, Qpr, and Evsumpr
 * corresponding to the first nonzero entry of the current block
 */
   colidx = 0;      /* points to first row of the current block */
   Aidx = Apr;      /* base indices into the arrays Bpr, Apr, Qpr, and Evsumpr; */
   Qidx = Qpr;      /* point to the beginning of the current block; note that A is */
   Bidx = Bpr;      /* treated differently than B, Q, and Evsum; that's because we */
   Evidx = Evpr;    /* know in advance how many nonzero entries are in the blocks */
                    /* of these matrices */
/*
 * now loop over the blocks
 */
   for (blkidx = 0; blkidx < nblk; blkidx++) {  /* loop over the blocks */
      bsize = blk[blkidx];                /* size of current block */
      bsize2 = bsize*bsize;               /* # entries in current block, if full */
      mxSetM(Ablk,bsize);                 /* set the dimensions of Ablk, etc. */
      mxSetN(Ablk,bsize);
      mxSetM(Qblk,bsize);
      mxSetN(Qblk,bsize);
/*
 *  copy data: block of A --> Ablk
 *             block of Q --> Qblk
 */
      nzblk = Ajc[colidx+bsize] - Ajc[colidx];    /* # nonzero in block of A */
      if(nzblk == bsize2) {                       /* block of A is full */
         memcpy(Ablkpr,Aidx,bsize2*eltsize);
         Aidx += bsize2;
      }
      else {                                       /* block of A is sparse */
         memset(Ablkpr,'\0',bsize2*eltsize);       /* initialize Ablk to 0 */
         baseidx = 0;   /* index into Ablkpr of top entry of current column */
         for(j = colidx; j < colidx+bsize; j++) {  /* loop over the columns */
            for(k = Ajc[j];k < Ajc[j+1]; k++) {    /* loop over nonzero entries of column */
               i = baseidx + Air[k] - colidx;      /* update index into Ablkpr */
               Ablkpr[i] = *Aidx;    /* write value of nonzero entry into Ablkpr */
               ++Aidx;
            }
            baseidx += bsize;
         }
      }

      nzblk = Qjc[colidx+bsize] - Qjc[colidx];    /* # nonzero in block of Q */
      if(nzblk == bsize2) {                       /* block of Q is full */
         memcpy(Qblkpr,Qidx,bsize2*eltsize);      /* block of Q is full */
         Qidx += bsize2;
      }
      else {                                       /* block of Q is sparse */
         memset(Qblkpr,'\0',bsize2*eltsize);       /* initialize Qblk to 0 */
         baseidx = 0;   /* index into Qblkpr of top entry of current column */
         for(j = colidx; j < colidx+bsize; j++) {  /* loop over the columns */
            for(k = Qjc[j];k < Qjc[j+1]; k++) {    /* loop over nonzero entries of column */
               i = baseidx + Qir[k] - colidx;      /* update index into Qblkpr */
               Qblkpr[i] = *Qidx;    /* write value of nonzero entry into Qblkpr */
               ++Qidx;
            }
            baseidx += bsize;
         }
      }
/*
 * compute Qblk' * Ablk * Qblk
 */
      prhs[0] = Ablk;                     /* Ablk is first input parameter */
      prhs[1] = Qblk;                     /* QAblk is second input parameter */
      mexCallMATLAB(1,plhs,2,prhs,"*");   /* compute Ablk * Qblk */
      prhs[0] = plhs[0];                  /* set ouput as the input to next call to Matlab */
      mexCallMATLAB(1,plhs,1,prhs,"'");   /* compute (Ablk * Qblk)' */
      prhs[0] = plhs[0];                  /* set ouput as first input to next call to Matlab */
      mexCallMATLAB(1,plhs,2,prhs,"*");   /* compute (Ablk * Qblk)' * Qblk */
/*
 * compute (Qblk' * Ablk * Qblk) ./ Evblk
 */
      lhspr = mxGetPr(plhs[0]);
      for(j = 0; j < bsize2; j++)
         lhspr[j] /= Evidx[j];
/*
 * compute Qblk * tmp * Qblk'
 * where tmp = (blk' * Ablk * Qblk) ./ Evblk
 */
      prhs[0] = Qblk;                     /* Qblk is first input parameter */
      prhs[1] = plhs[0];                  /* second input parameter is tmp */
      mexCallMATLAB(1,plhs,2,prhs,"*");   /* compute Qblk * tmp */
      prhs[0] = plhs[0];                  /* set ouput as input to next call to Matlab */
      mexCallMATLAB(1,plhs,1,prhs,"'");   /* compute (Qblk * Ablk)' */
      prhs[1] = plhs[0];                  /* set ouput as second input to next call to Matlab */
      prhs[0] = Qblk;
      mexCallMATLAB(1,plhs,2,prhs,"*");   /* compute Qblk * (Qblk * Ablk)' */
/*
 * now copy output to corresponding block of B
 */
      lhspr = mxGetPr(plhs[0]);
      memcpy(Bidx,lhspr,bsize2*eltsize);
/*
 * and update the pointers to block positions and column index;
 * note that Aidx and Qidx have already been incremented when blocks
 * were copied to Ablk and Qblk
 */
      Bidx += bsize2;
      Evidx += bsize2;
      colidx += bsize;
   }
}

void mexFunction(
   int nlhs,       mxArray *plhs[],
   int nrhs, const mxArray *prhs[]
)
{
   double *Apr,*Qpr,*Evpr,*Bpr,*blk;
   int i,j,n,sumblk2,nblk,maxbsize,eltsize;
   int *Air,*Ajc,*Qir,*Qjc,*Evir,*Evjc,*Bir,*Bjc;

/* Check for proper number of arguments */
   if (nrhs != 4) {
      mexErrMsgTxt("lyapsol requires four input arguments.");
   } else if (nlhs > 1) {
      mexErrMsgTxt("lyapsol requires one output argument.");
   }

   i = mxGetM(A_IN);
   n = mxGetN(A_IN);
   if (i != n)
      mexErrMsgTxt("lyapsol: A must be square.");
   j = mxGetM(Q_IN);
   n = mxGetN(Q_IN);
   if (j != n)
      mexErrMsgTxt("lyapsol: Q must be square.");
   if (i != n)
      mexErrMsgTxt("lyapsol: A and Q must have same dimension.");
   j = mxGetM(Ev_IN);
   n = mxGetN(Ev_IN);
   if (j != n)
      mexErrMsgTxt("lyapsol: Evsum must be square.");
   if (i != n)
      mexErrMsgTxt("lyapsol: A and Evsum must have same dimension.");
   i = mxGetM(blk_IN);
   nblk = mxGetN(blk_IN);
   nblk = max(i,nblk);
   if (nblk <= 1)
      mexErrMsgTxt("lyapsol: multiple blocks required.");
   if (nblk > n)
      mexErrMsgTxt("lyapsol: too many blocks.");

/* consistency check */
   if (!(mxIsSparse(A_IN) && mxIsSparse(Q_IN) && mxIsSparse(Ev_IN)))
      mexErrMsgTxt("lyapsol: block diagonal matrices must be sparse.");

   eltsize = mxGetElementSize(A_IN);

/* Assign pointers to the various input parameters */
   Apr = mxGetPr(A_IN);
   Air = mxGetIr(A_IN);
   Ajc = mxGetJc(A_IN);

   blk = mxGetPr(blk_IN);

   sumblk2 = 0;
   maxbsize = 0;
   for(i = 0; i < nblk; i++) {
      j = blk[i];
      if (j > maxbsize)
         maxbsize = j;
      sumblk2 += j*j;
   }

   Qpr = mxGetPr(Q_IN);
   Qir = mxGetIr(Q_IN);
   Qjc = mxGetJc(Q_IN);

   Evpr = mxGetPr(Ev_IN);
   Evir = mxGetIr(Ev_IN);
   Evjc = mxGetJc(Ev_IN);
   if (Evjc[n] < sumblk2)
      mexErrMsgTxt("lyapsol: all blocks of Evsum must have nonzero entries.");

/* Create a matrix for the return argument (necessarily sparse) */
   B_OUT = mxCreateSparse(n,n,sumblk2,mxREAL);
   Bir = mxGetIr(B_OUT);
   Bjc = mxGetJc(B_OUT);
/*
 * copy the contents of Evjc and Evir array to the Bjc and Bir array;
 * (B and Evsum have exactly the same sparsity structure)
 * the Evjc array has length n+1
 * the Evir array has length sumblk2
 */
   memcpy(Bjc,Evjc,(n+1)*sizeof(int));
   memcpy(Bir,Evir,sumblk2*sizeof(int));

/* Assign pointers to the output parameter */
   Bpr = mxGetPr(B_OUT);

/* Do the actual computations in a subroutine */
   lyapsol(Bpr,Apr,Air,Ajc,Qpr,Qir,Qjc,Evpr,nblk,blk,maxbsize,eltsize);

   return;
}
