/* functions to handle MSI files
 */
 
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#include "msi.h"
#include "msi_io.h"

#define MAXCON 4

#define DIM 9999

enum mdf_status { MDF_OK, MDF_EOF, MDF_ERR };

void read_car(FILE * , FILE * , int * , char * , char ** ,
              char ** , int ** , int ** , float ** );
void car2xyz(FILE * , int * , struct pdb_info * , char ** ,
             char ** , float ** , float * );
int  mdf2con(FILE * , int , char * , char ** , int ** , int ** ,
             char ** , int * );
char pdb2mmr(char *pdbstr);

int check_car(FILE *fp, int *pna0, int *pnat, char *msi_title)
/* checks if a file is really a 'car' file, and returns the number of atoms
   of the first molecule and of all molecules on file
 */
{
  char title[256];
  int tcount = 0, fcount = 0;
  
  rewind(fp);
  fgets(title,256,fp);                  /*  1                              */
  if(strncmp(title, CAR_W1, (int) strlen(CAR_W1)) != NULL) {
    rewind(fp);
    return( 0 );
  }
  fgets(title,256,fp);                  /*  2                              */
  fgets(msi_title,256,fp);              /*  3                              */
  fgets(title,256,fp);                  /*  4                              */
  
  while(!feof(fp) && (fgets(title, 256, fp) != NULL)) {
    /* read all lines representing atoms */
    if(!strncmp(title, "end", 3)) {
      /* store the number of atoms of the first molecule */
      if(!fcount) fcount = tcount;
      continue;
    }
    tcount++;
  }

  if(!tcount) {
    printf("Empty 'car' file !\n");
    exit ( 1 );
  }
  
  /* reset the correct position to process all data on file */
  rewind(fp);
  fgets(title,256,fp);                  /*  1                              */
  fgets(title,256,fp);                  /*  2                              */
  fgets(title,256,fp);                  /*  3                              */
  fgets(title,256,fp);                  /*  4                              */
  
  /* positive outcome */
  if(pna0) *pna0 = fcount;
  if(pnat) *pnat = tcount;
  return ( 1 );
}
void read_msi(FILE *cfp, FILE *mfp, int *numat,
              char **atsy, char **resnum, int **con, int **mul,
              float **xyz)
/* process a car/mdf file set exhaustively; returns a set of molecules
   with sequential atom numbers
 */
{
  char molname[80];
  int nat = 0, na0;
  register int i, j;

  while(!feof(cfp)) {
    /* read one molecule */
    read_car(cfp, mfp, &na0, molname, &atsy[nat], &resnum[nat],
             &con[nat], &mul[nat], &xyz[nat]);
    if(!na0) break;         
    /* fix the connection table */
    for(i = nat; i < (nat+na0); i++) {
      for(j = 0; j < MAXCON; j++) {
        if(con[i][j]) con[i][j] += nat;
      }
    }

    /* reset the number of atoms */
    nat += na0;
  }
  
  /* total number of atoms */
  *numat = nat;

}

void read_car(FILE *cfp, FILE *mfp, int *numat, char *molname,
              char **atsy, char **resnum, int **con, int **mul,
              float **xyz)
/* read one molecule (connection table) from a car/mdf file pair
 */
{
  struct pdb_info *pdb_vec;
  char **atname, **bo;
  float *q1;
  int *formq;
  int nat;
  int status;
  register int i, j;

  pdb_vec = (struct pdb_info *) malloc(DIM * sizeof(struct pdb_info));
  atname = (char **) malloc(DIM * sizeof(char *));
  bo = (char **) malloc(DIM * sizeof(char *));
  q1 = (float *) malloc(DIM * sizeof(float));
  formq = (int *) malloc(DIM * sizeof(int));
  
  nat = 0;
  
  car2xyz(cfp, &nat, pdb_vec, atname, atsy, xyz, q1);
                                          /*  the number of atoms is now 
                                              defined                         */

  for(i=0; i<nat; ++i) strcpy(resnum[i], pdb_vec[i].res_numb);

  *numat = nat;                           /*  number of atoms                 */

  /* cannot continue */
  if(nat > DIM || !nat) return;

  for(i=0; i<nat; i++) bo[i] = (char *) malloc((size_t) MAXCON * sizeof(char));
  
                                          /*  get the connection table        */
  status = mdf2con(mfp, nat, molname, atname, con, mul, bo, formq);

  switch(status) {
    case MDF_ERR :
      printf("error reading an 'mdf' file !\n");
      exit(1);
    case MDF_OK :
      break;
    case MDF_EOF :
      /* we reached the end of file : assume that this is not 
         the first structure and that the connection table
         and bond orders have been defined already */
      break;
  }

  for(j=0; j<nat; j++) {                  /*  free memory for these arrays    */
    free(atname[j]);
    free(bo[j]);
  }

  free(pdb_vec);
  free(atname);
  free(bo);
  free(q1);
  free(formq);

}

void car2xyz(FILE *cfp, int *numat, struct pdb_info *pdb_vec, char **atname,
             char **atsy, float **xyz, float *q1)
{
  char atmstr[20], molstr[20], resstr[20];
  char fline[256], sline[20];
  int nat, strdim;

  nat = *numat;
  
  while(fgets(fline, 256, cfp) != NULL) {

    if(!strncmp(fline, "end", 3)) return; /*  last line for a molecule        */
                                          /*  get coordinates and id          */
    sscanf(fline, "%s %f %f %f %s %s %*s %s %f",
           atmstr, &xyz[nat][0], &xyz[nat][1], &xyz[nat][2],
           molstr, resstr, sline, &q1[nat]);
    strdim = strlen(atmstr) + strlen(molstr) + strlen(resstr) + 3;

    atname[nat] = (char *) malloc(strdim * sizeof(char));
    strcpy(atname[nat], molstr);          /*  get the name of this atom      */
    strcat(atname[nat], "_");
    strcat(atname[nat], resstr);
    strcat(atname[nat], ":");
    strcat(atname[nat], atmstr);
    strcpy(pdb_vec[nat].res_name, molstr);
    strcpy(pdb_vec[nat].res_numb, resstr);
    pdb_vec[nat].res_snme = pdb2mmr(molstr);

 /*   if(!sline[1]) strcat(sline, " "); */    /*  pad with a blank character     */

    strcpy(atsy[nat], sline);             /*  atomic symbol                  */

    *numat = ++nat;                       /*  one more atom added so far     */
    
  } /* while() */
  
} /* car2xyz() */

int mdf2con(FILE *mfp, int nat, char mname[], char **atnm, int **con,
            int **mul, char **bo, int formq[])
/* modified to use contigous memory for the arrays */
{
  char ct, *cp0, mystr[4], fline[256];
  char atmstr[25], cs[4][25], fqstr[4];
  int ncn, have_mol = 0, status;
  register int i, j, k;

  status = MDF_EOF;

  i = 0;
  while(fgets(fline, 256, mfp) != NULL) {
    if(!strncmp(fline, "@molecule", 9)) {
      sscanf(fline, "%*s %s", mname);     /*  get the name of this molecule   */
      have_mol++;                         /*  prepare to read data            */
      continue;
    }

    if(!have_mol) continue;               /*  keep searching                  */

    if(fline[0] == '\n' ||                /*  ignore empty and comment lines  */
       fline[0] == '\0' ||
       fline[0] == ' '  ||
       fline[0] == '#'  ||
       fline[0] == '!'  ||
       fline[0] == '@') continue;

    for(j=0; j<MAXCON; j++) {             /*  fill the blanks with zeroes     */
      con[i][j] = 0;
      mul[i][j] = 0;
      bo[i][j] = '\0';
    }
                                          /*  the formal charges must be read */
                                          /*  as strings (pos. 6)             */
    ncn = sscanf(fline, "%s %*s %*s %*s %*d %s %*f %*d %*d %*d %*f %*f %s %s %s %s",
                 atmstr, fqstr, cs[0], cs[1], cs[2], cs[3]);
    ncn--;
    ncn--;                                /*  count at. name and charge out   */
    
    ct = fqstr[1];                        /*  put charge first                */
    if(ct == '-' || ct == '+') {
      fqstr[1] = fqstr[0];
      fqstr[0] = ct;
    }
    formq[i] = (int) atoi(fqstr);
                                          /*  connection table as names       */
    cp0 = strchr(atmstr, ':');
    *(cp0+1) = '\0';
    for(j=0; j<ncn; j++) {
      cp0 = strchr(cs[j], '/');
      if(cp0 && *cp0 == '/') {
        *cp0 = '\0';
        sscanf((cp0 + 1), "%s", mystr);   /*  get the bond order data         */
        if(!strcmp(mystr, "1.5")) bo[i][j] = PDBCHR;
        else
        if(!strcmp(mystr, "2.0")) bo[i][j] = DBNCHR;
        else
        if(!strcmp(mystr, "3.0")) bo[i][j] = TBNCHR;
        else bo[i][j] = SBNCHR;
        mul[i][j] = (int) atoi(cp0 + 1);
      }
      else {
        mul[i][j] = 1;                    /*  implicit single bond            */
        bo[i][j] = SBNCHR;
      }
      cp0 = strchr(cs[j], ':');
      if(!cp0 || *cp0 != ':') {
        strcpy(fline, atmstr);
        strcat(fline, cs[j]);             /*  build a full atom name          */
        strcpy(cs[j], fline);
      }
    }
    if((float) i < nat/2.) {              /*  search upward                   */
      for(j=0; j<ncn; j++) {
        for(k=0; k<nat; k++) {
          if(!strcmp(cs[j], atnm[k])) {
            con[i][j] = k + 1;
            break;
          }
        }
      }
    }
    else {                                /*  search downward                 */
      for(j=0; j<ncn; j++) {
        for(k=nat-1; k>=0; k--) {
          if(!strcmp(cs[j], atnm[k])) {
            con[i][j] = k + 1;
            break;
          }
        }
      }      
    }

                                          /*  not all the connection table    */
                                          /*  was filled : we must stop       */
    for(j=0; j<ncn; j++) if(!con[i][j]) return(MDF_ERR);

    i++;
    status = MDF_OK;                      /*  no errors so far                */
    if (i == nat) break;                  /*  the last atom has been treated  */
    status = MDF_ERR;                     /*  could not find all atoms !      */
  }

  return ( status );

} /* mdf2con() */

char pdb2mmr(char *pdbstr)
{
  if(!strcmp(pdbstr, "ALA")) return 'A';
  if(!strcmp(pdbstr, "ASX")) return 'B';
  if(!strcmp(pdbstr, "CYS")) return 'C';
  if(!strcmp(pdbstr, "ASP")) return 'D';
  if(!strcmp(pdbstr, "GLU")) return 'E';
  if(!strcmp(pdbstr, "PHE")) return 'F';
  if(!strcmp(pdbstr, "GLY")) return 'G';
  if(!strcmp(pdbstr, "HIS")) return 'H';
  if(!strcmp(pdbstr, "ILE")) return 'I';
  if(!strcmp(pdbstr, "HSE")) return 'J';
  if(!strcmp(pdbstr, "LYS")) return 'K';
  if(!strcmp(pdbstr, "LEU")) return 'L';
  if(!strcmp(pdbstr, "MET")) return 'M';
  if(!strcmp(pdbstr, "ASN")) return 'N';
  if(!strcmp(pdbstr, "HYP")) return 'O';
  if(!strcmp(pdbstr, "PRO")) return 'P';
  if(!strcmp(pdbstr, "GLN")) return 'Q';
  if(!strcmp(pdbstr, "ARG")) return 'R';
  if(!strcmp(pdbstr, "SER")) return 'S';
  if(!strcmp(pdbstr, "THR")) return 'T';
  if(!strcmp(pdbstr, "HYL")) return 'U';
  if(!strcmp(pdbstr, "VAL")) return 'V';
  if(!strcmp(pdbstr, "TRP")) return 'W';
  if(!strcmp(pdbstr, "UNK")) return 'X';
  if(!strcmp(pdbstr, "TYR")) return 'Y';
  if(!strcmp(pdbstr, "GLX")) return 'Z';
  if(!strcmp(pdbstr, "ORN")) return '1';
  if(!strcmp(pdbstr, "SAR")) return '2';
  if(!strcmp(pdbstr, "TAU")) return '3';
  if(!strcmp(pdbstr, "THY")) return '4';
  if(!strcmp(pdbstr, "ALB")) return '5';
  if(!strcmp(pdbstr, "PHO")) return '6';
  if(!strcmp(pdbstr, "PCA")) return '7';
  if(!strcmp(pdbstr, "ACE")) return '8';
  if(!strcmp(pdbstr, "FOR")) return '9';
  if(!strcmp(pdbstr, "HOH")) return '0';
  if(!strcmp(pdbstr, "WAT")) return '0';
  if(!strcmp(pdbstr, "A"))   return 'a';
  if(!strcmp(pdbstr, "C"))   return 'b';
  if(!strcmp(pdbstr, "G"))   return 'c';
  if(!strcmp(pdbstr, "U"))   return 'd';
  if(!strcmp(pdbstr, "1MA")) return 'e';
  if(!strcmp(pdbstr, "5MC")) return 'f';
  if(!strcmp(pdbstr, "OMC")) return 'g';
  if(!strcmp(pdbstr, "2MG")) return 'h';
  if(!strcmp(pdbstr, "M2G")) return 'i';
  if(!strcmp(pdbstr, "7MG")) return 'j';
  if(!strcmp(pdbstr, "OMG")) return 'k';
  if(!strcmp(pdbstr, "5MU")) return 'l';
  if(!strcmp(pdbstr, "H2U")) return 'm';
  if(!strcmp(pdbstr, "PSU")) return 'n';
  if(!strcmp(pdbstr, "YG"))  return 'o';
  if(!strcmp(pdbstr, "T"))   return 'p';

  return ' ';
}




