#include <stdio.h>
#include <string.h>
#include <math.h>

#define MAX_SEQ 31
#define MAX(a,b) ((a)>(b)?(a):(b))
#define MIN(a,b) ((a)<(b)?(a):(b))

#define CNTS_LEN 6
#define CNTS_A 0
#define CNTS_T 1
#define CNTS_C 2
#define CNTS_G 3
#define CNTS_N 4
#define CNTS_GAP 5


double logs[MAX_SEQ+1];
double maxentr;
char* alpha = "ATCGN-";

typedef struct pair_ints {
  int s;
  int e;
} pair;

typedef struct align_res {
  char* names[MAX_SEQ];
  int algnlen;
  int numseq;
  int* algn;
  char* cnts[CNTS_LEN];
} align;

int cntlets(FILE* input) {
  int numread=0;
  char temp[256];
  char currchar;

  if (feof(input))
    return 0;
  fgets(temp, 255, input);
  if (temp[0] != '>') {
    fprintf(stderr, "File is not in FASTA format!!\n");
    exit(1);
  }
  while ((currchar != '>') && (currchar != EOF)) {
    currchar = fgetc(input);
    if (!isspace(currchar)) {
      currchar = toupper(currchar);
      numread++;
    }
  }
  rewind(input);
  return numread-1;
}

int readseq(FILE* input, align* myal, int seqnum, int checksum) {
  int numread=0, help;
  char temp[256];
  char currchar;

  if (feof(input))
    return 0;
  fgets(temp, 255, input);
  if (temp[0] != '>') {
    fprintf(stderr, "File is not in FASTA format!!\n");
    exit(1);
  }
  myal->names[seqnum] = (char*) malloc((strlen(temp))*sizeof(char));
  strcpy(myal->names[seqnum], temp+1);
  *(strchr(myal->names[seqnum], '\n')) = 0;

  currchar = fgetc(input);
  while (numread <= checksum &&(currchar != '>') && (currchar != EOF)) {
    if (!isspace(currchar)) {
      currchar = toupper(currchar);
      if (!strchr(alpha, currchar)) {
	fprintf(stderr, "WARNING %c converted to N\n", currchar, alpha);
	currchar = 'N';
      }
      help = strchr(alpha, currchar)-alpha;
      myal->cnts[help][numread]++;
      if (help != CNTS_GAP) {
	myal->algn[numread] |= (1 << seqnum);
      }
      numread++;
    }
    currchar = fgetc(input);
  }
  if (currchar == '>')
    ungetc(currchar, input);
  if (numread != checksum) {
    fprintf(stderr, "Sequence (%s) of different lengths (%d v. %d)!!\n", 
	    myal->names[seqnum], numread, checksum);
    exit(1);
  }
  return 1;
}


align* readMultial(FILE* alfile) {
  int letcnt = cntlets(alfile), i, j;
  align* res = (align*)malloc (sizeof(align));
  res->algn = (int*) malloc (sizeof(int)* letcnt);
  for (j=0; j<CNTS_LEN; j++)
    res->cnts[j] = (char*) malloc (sizeof(char)* letcnt);
  for (i=0; i<letcnt; i++) {
    res->algn[i] = 0;
    for (j=0; j<CNTS_LEN; j++)
      res->cnts[j][i] = 0;
  }
  i = 0;
  while (readseq(alfile, res, i++, letcnt)) 
    ;

  res->numseq = i-1;
  res->algnlen = letcnt;
  return res;
}

void skipto (align *myal, int trgt, int *i, int *a, int *b){
  while (*i < trgt){
    if (myal->algn[*i] & 1) (*a)++;
    if (myal->algn[*i] & 2) (*b)++;
    //    printf ("%d", myal->algn[*i]);
    (*i)++;
  }
}

void analyze (align *myal, int cutoff, int window){
  int i, j, k, l, c;
  int runstart = -1;
  int *s, *e, size = 1, len = 0;
  int s1, s2, a1, a2, b1, b2;
  
  s = (int *) malloc (size * sizeof (int));
  e = (int *) malloc (size * sizeof (int));  
  
  for (i = 0, c = 0; i < window; i++){
    for (j = 0, k = 0; k == 0 && j < strlen (alpha); j++) k = (myal->cnts[j][i] == 2);
    if (k) c++;
  }

  if (c * 100 >= window * cutoff) runstart = 0;
  for (i = 1; i < myal->algnlen - window + 1; i++){
    for (j = 0, k = l = 0; j < strlen (alpha); j++){
      k = k || (myal->cnts[j][i + window - 1] == 2);
      l = l || (myal->cnts[j][i - 1] == 2);
    }
    if (k) c++;
    if (l) c--;

    if (c * 100 >= window * cutoff){
      if (runstart < 0){
	if (len > 0 && e[len - 1] >= i)
	  runstart = s[--len];
	else 
	  runstart = i;
      }      
    }
    else {
      if (runstart >= 0){
	s[len] = runstart;
	e[len] = i + window - 1;
	len++;

	if (len == size){
	  size *= 2;
	  s = (int *) realloc (s, sizeof (int) * size);
	  e = (int *) realloc (e, sizeof (int) * size);
	}
	runstart = -1;
      }
    }
  }

  if (runstart >= 0){
    s[len] = runstart;
    e[len] = myal->algnlen - 1;
    len++;
  }

  c = s1 = a1 = b1 = s2 = a2 = b2 = 0;
  for (i = 0; i < len; i++){
    //    skipto (myal, s[i], &s1, &a1, &b1);
    //    skipto (myal, e[i], &s2, &a2, &b2);
    c += e[i] - s[i];
  }

  s1 = a1 = b1 = s2 = a2 = b2 = 0;
  for (i = 0; i < len; i++){
    skipto (myal, s[i], &s1, &a1, &b1);
    skipto (myal, e[i], &s2, &a2, &b2);
    printf ("(%d %d) --> (%d %d), (%d %d)\n", s[i], e[i], a1, a2, b1, b2);
  }

  free (s); free (e);
  
}

int main(int argc, char** argv) {
  FILE *alignfile;
  align* myal;
  int i;

  if (argc != 4) {
    printf("usage:\ncstat multi_fasta_file cutoff window_size\n");
    exit(1);    
  }
  if (!(alignfile = fopen(argv[1],"r"))) {
    printf("couldnt open alignment file %s\n",argv[1]);
    return 2;
  }

  myal = readMultial(alignfile);
  analyze (myal, atoi (argv[2]), atoi (argv[3]));
}
