/*
 *
 * protein.c
 * Created: 20081229 Jerome Hert
 *  
 */

#include "input.h"

/*
 * +--------------------------------------------------------------------------+
 * | Prototypes                                                               |
 * +--------------------------------------------------------------------------+
 */

static char *stripstr(			char *string);
static void	input_read_pdb(		t_protein *protein);
static void	input_read_mol2(	t_protein *protein);
static void	readPDBLine(		char *line, char *PDBAtom, char *resName, char *PDBChainID, int *PDBResSeq, double *PDBX, double *PDBY, double *PDBZ, char *PDBElement);
static void readMOL2Line(		char *line, char *MOLAtom, double *MOLX, double *MOLY, double *MOLZ, char *MOLElement, int *MOLSubsID, char *resName);
static void readMOL2SubsLine(	char *line, int *subs_id, char *res_type, int *res_id, char *subs_type, char *chain);
static int isInResidueList(		t_protein *protein, char chain, int residue);
static int isInSubstructureList(t_protein *protein, int subs_id);
static int isConsideredAtom(	char *element);
static void align_pdb_file(		char *infile, char *outfile, t_transfo *transfo, t_point *center);
static void align_mol2_file(	char *infile, char *outfile, t_transfo *transfo, t_point *center);

/*
 * +--------------------------------------------------------------------------+
 * | Generic functions                                                        |
 * +--------------------------------------------------------------------------+
 */

char *substr(const char *string, size_t begin, size_t len) {
	char	*substring = NULL;
	int		string_lentgh = strlen(string);
	if ( string == ZERO || string_lentgh == ZERO || string_lentgh < begin || string_lentgh < (begin+len) )
		return FALSE;
	substring = (char *)calloc(len+ONE, sizeof(char));
	strncpy(substring, string + begin, len);
	substring[len] = '\0';
	return substring;
}

static char *stripstr(char *string) {
	int i, j, number_of_char;
	char *string_stripped;
	for (i=ZERO;i<strlen(string); i++)
	{
		if (isalnum(string[i]) != ZERO)
			number_of_char++;
	}
	string_stripped = (char *)calloc(number_of_char+ONE, sizeof(char));
	j = 0;
	for (i=ZERO;i<strlen(string); i++)
	{
		if (isalnum(string[i]) != ZERO) {
			string_stripped[j] = string[i];
			j++;
		}
	}
	string_stripped[number_of_char] = '\0';
	return string_stripped;
}

/*
 * +--------------------------------------------------------------------------+
 * | FUNCTION fOR INPUT FILE                                                  |
 * +--------------------------------------------------------------------------+
 */

int input_is_comment(char *line) {
	int is_comment = FALSE;
	if (line[ZERO] == '\0' || line[ZERO] == '#' || line[ZERO] == '\n' || line[ZERO] == ' ')
		is_comment = TRUE;
	return is_comment;
}

void input_process_line(char *line, t_protein *protein) {
	sscanf(line, "%s %s", protein->file_protein, protein->file_residues );
	return;
}

void input_print(FILE *fh, t_protein *protein) {
	fprintf(fh, "WARNING:\t%s\t%s\n", protein->file_protein, protein->file_residues);
	return;
}

/*
 * +--------------------------------------------------------------------------+
 * | FUNCTION fOR RESIDUE FILE                                                |
 * +--------------------------------------------------------------------------+
 */

void input_read_residues(t_protein *protein) {

	int		i, number_of_lines = 0;
	char	line[MAXLINE];
	FILE	*fh = NULL;

	/* Open file handle */
	if ( ( fh = fopen(protein->file_residues, "r") ) == NULL ) {
		fprintf(stderr, "ERROR:\tCannot open file %s for reading\n", protein->file_residues);
		exit(1);
	}

	/* Count the number of residues for the calloc */
	line[0] = '\0';
	while ( fgets(line, MAXLINE, fh) != NULL ) {
		if (line[ZERO] != '\0' && line[ZERO] != '#' && line[ZERO] != '\n' && line[ZERO] != ' ')
			++number_of_lines;
		line[ZERO] = '\0';
	}
	
	/* Allocate memory */;
	protein->size		= number_of_lines;
	protein->residue	= (t_residue *)calloc(protein->size, sizeof(t_residue));

	/* rewind file */
	fseek(fh, 0L, SEEK_SET);

	i = ZERO;
	while ( fgets(line, MAXLINE, fh) != NULL ) {
		if (line[ZERO] != '\0' && line[ZERO] != '#' && line[ZERO] != '\n' && line[ZERO] != ' ') {
			residue_initialize( &protein->residue[i] );
			sscanf(line, "%c %d", &protein->residue[i].chain, &protein->residue[i].id );
			if (protein->residue[i].chain == '-')
				protein->residue[i].chain = ' '; 
			++i;
		}
		line[0] = '\0';
	}

	/* Close file handle and exit */
	fclose(fh);
	return;
}

void input_print_residues(FILE *fh, t_protein *protein) {
	int i;
	for (i=ZERO;i<protein->size;i++) {
		fprintf(fh, "WARNING (input_print_residues):\t%s\t%d\n", &protein->residue[i].chain, protein->residue[i].id);
	}
	return;
}

 /*
 * +--------------------------------------------------------------------------+
 * | FUNCTION FOR PROTEIN FILE                                                |
 * +--------------------------------------------------------------------------+
 */

void input_read_protein(t_protein *protein) {
 	char PDB_EXT[EXT_PDB_SIZE]		= EXT_PDB;
 	char MOL2_EXT[EXT_MOL2_SIZE]	= EXT_MOL2;
 	/*
 	 * See if the filename of the protein file is long enough to have at least one extension.
 	 * In case it does, call the corresponding function PDB, Mol2, etc...
 	 * Otherwise, print error and quit.
 	 */
	if ( strlen(protein->file_protein) < strlen(PDB_EXT) ) {
		fprintf(stderr, "ERROR:\tDon't know the format of file \"%s\".\n", protein->file_protein);
		exit(1);
	}
	else if ( strncmp(protein->file_protein + ( strlen(protein->file_protein) - strlen(PDB_EXT) ), PDB_EXT, strlen(PDB_EXT) ) == ZERO )
		input_read_pdb(protein);
	else if ( strncmp(protein->file_protein + ( strlen(protein->file_protein) - strlen(MOL2_EXT) ), MOL2_EXT, strlen(MOL2_EXT) ) == ZERO )
		input_read_mol2(protein);
	else {
		fprintf(stderr, "ERROR:\tDon't know the format of file \"%s\".\n", protein->file_protein);
		exit(1);
	}
	return;
}

static void input_read_pdb(t_protein *protein) {

	int			i;
	char		line[MAXLINE];
	char		PDBAtom[MAX_ATOM_TYPE_LONG];
	char		resName[MAX_RESIDUE_TYPE_LONG];
	char		PDBChainID = '\0',	OLDChainID = '?';
	int			PDBResSeq = ZERO,	OLDResSeq = QUADNINES;
	double		PDBX, PDBY, PDBZ;
	char		PDBElement[PDBElementLength+ONE];
	int			isConsideredResidue	= MINUSONE;
	int			nb_residue_not_found= ZERO;
	t_protein	*tmp_protein		= NULL;
	FILE		*fh = NULL;

	/* Allocate memory */
	tmp_protein					= (t_protein *)calloc(ONE, sizeof(t_protein));
	protein_initialize(tmp_protein);
	tmp_protein->residue 		= (t_residue *)calloc(protein->size, sizeof(t_residue));
	protein_copy_infos(tmp_protein, protein);
	for(i=ZERO; i<tmp_protein->size;i++)
		tmp_protein->residue[i].atom	= (t_atom *)calloc(MAXATOM, sizeof(t_atom));

	/* Open file handle */
	if ( ( fh = fopen(protein->file_protein, "r") ) == NULL ) {
		fprintf(stderr, "ERROR:\tCannot open file %s for reading\n", protein->file_protein);
		exit(1);
	}

	/* Read PDB */
	line[0]		= '\0';
	OLDChainID	= '?';
	OLDResSeq	= QUADNINES;
	while ( fgets(line, MAXLINE, fh) != NULL ) {
		if ( strncmp(line, "ATOM", FOUR) == ZERO)
		{
			PDBAtom[ZERO]	= resName[ZERO] = PDBChainID = PDBElement[ZERO] = '\0';
			PDBResSeq		= ZERO;
			PDBX			= PDBY = PDBZ = 0.;
			/* Read the information */
			readPDBLine(line, PDBAtom, resName, &PDBChainID, &PDBResSeq, &PDBX, &PDBY, &PDBZ, PDBElement);
			if (PDBChainID != OLDChainID || PDBResSeq != OLDResSeq) {
				/*
				 * Get the index of the correponding residue if residue considered
				 * Returns -1 if the residue is not in the list of considered residues
				 */
				isConsideredResidue = isInResidueList(tmp_protein, PDBChainID, PDBResSeq);
				if ( isConsideredResidue > MINUSONE ) {
					if ( tmp_protein->residue[isConsideredResidue].type_long[ZERO] != '\0' && strncmp(tmp_protein->residue[isConsideredResidue].type_long, resName, MAX_RESIDUE_TYPE_LONG) != ZERO ) {
						fprintf(stderr, "ERROR:\tAmbiguous PDB input. Residue %c %s%d was found twice in file \"%s\"\n", PDBChainID, resName, PDBResSeq, protein->file_protein);
						exit(1);
					}
					residue_populate(&tmp_protein->residue[isConsideredResidue], resName, PDBChainID, PDBResSeq);
				}
			}
			/* If is in the list */
			if ( isConsideredResidue > MINUSONE ) {
				/* Copy Atom */
				if (tmp_protein->residue[isConsideredResidue].size == MAXATOM) {
					fprintf(stderr, "ERROR:\tExceeding the maximum number of atoms for residue %c %s%d in PDB file \"%s\"\n", PDBChainID, resName, PDBResSeq, tmp_protein->file_protein);
					exit(1);
				}
				if (isConsideredAtom(PDBElement)) {
					atom_populate(&tmp_protein->residue[isConsideredResidue].atom[tmp_protein->residue[isConsideredResidue].size], PDBAtom, PDBX, PDBY, PDBZ, PDBElement);
					tmp_protein->residue[isConsideredResidue].size++;
				}
			}
			OLDChainID		= PDBChainID;
			OLDResSeq		= PDBResSeq;
		}
		line[ZERO] = '\0';
	}
	nb_residue_not_found = ZERO;
	for (i=ZERO;i<tmp_protein->size;i++) {
		if ( tmp_protein->residue[i].size == 0 ) {
			fprintf(stderr, "ERROR:\tResidue %c %d is missing in file \"%s\".\n", tmp_protein->residue[i].chain, tmp_protein->residue[i].id, tmp_protein->file_protein);
			nb_residue_not_found++;
		}
	}
	if (nb_residue_not_found) {
		fprintf(stderr, "ERROR:\tNot all residues were found in PDB file \"%s\".\n", tmp_protein->file_protein);
		exit(1);
	}
	/* Close file handle and exit */
	fclose(fh);
	/* Copy tmp_protein to protein */
	protein_copy(protein, tmp_protein);
	/* free tmp_protein */
	protein_free(tmp_protein);
	free(tmp_protein);
	return;
}

static void input_read_mol2(t_protein *protein) {

	int			i;
	int			in_substructure_section	= MINUSONE;
	int			in_atom_section			= MINUSONE;
	int			isConsideredResidue		= MINUSONE;
	int			nb_residue_not_found	= ZERO;
	int			MOLResSeq 				= ZERO;
	int			MOLSubsID				= ZERO;
	int			OLDSubsID				= QUADNINES;
	double		MOLX, MOLY, MOLZ;
	char		MOLChainID 				= '\0';
	char		MOLSubsType[MAXCHAR];
	char		MOLAtom[MAX_ATOM_TYPE_LONG];
	char		resName[MAX_RESIDUE_TYPE_LONG];
	char		MOLElement[PDBElementLength+ONE];
	char		line[MAXLINE];
	t_protein	*tmp_protein			= NULL;
	FILE		*fh						= NULL;

	/* Allocate memory */
	tmp_protein					= (t_protein *)calloc(ONE, sizeof(t_protein));
	protein_initialize(tmp_protein);
	tmp_protein->residue 		= (t_residue *)calloc(protein->size, sizeof(t_residue));
	protein_copy_infos(tmp_protein, protein);
	for(i=ZERO; i<tmp_protein->size;i++)
		tmp_protein->residue[i].atom	= (t_atom *)calloc(MAXATOM, sizeof(t_atom));

	/* Open file handle */
	if ( ( fh = fopen(tmp_protein->file_protein, "r") ) == NULL ) {
		fprintf(stderr, "ERROR:\tCannot open file %s for reading\n", tmp_protein->file_protein);
		exit(1);
	}

	/*
	 * First we need to get the substructure section of the MOL2
	 * This section is compulsary as it is the only place providing
	 * the chain and id number. These are linked to a substructure id
	 * (subst_id) in the atom section that is the way of identifying a
	 * residue in a MOL2 file
	 */
	in_substructure_section = MINUSONE;
	while(fgets(line, MAXLINE, fh) != NULL) {
		if ( strncmp(line, "@<TRIPOS>SUBSTRUCTURE", TWENTYONE) == ZERO )
			in_substructure_section = TRUE;
		else if ( in_substructure_section == TRUE && strncmp(line, "@<TRIPOS>", EIGHT) == ZERO ) {
			in_substructure_section = FALSE;
			break;
		}
		else if ( in_substructure_section == TRUE ) {
			readMOL2SubsLine(line, &MOLSubsID, resName, &MOLResSeq, MOLSubsType, &MOLChainID);
			isConsideredResidue	= MINUSONE;
			if (strcmp(MOLSubsType, "RESIDUE") == 0)
				isConsideredResidue	= isInResidueList(tmp_protein, MOLChainID, MOLResSeq);
			if (isConsideredResidue != MINUSONE) {
				if ( tmp_protein->residue[isConsideredResidue].Ca != MINUSONE && tmp_protein->residue[isConsideredResidue].Ca != MOLSubsID ) {
					fprintf(stderr, "ERROR:\tAmbiguous SYBMOL2 input. Residue %c %d (Subs %d) was found twice in file \"%s\"\n", MOLChainID, MOLResSeq, MOLResSeq, tmp_protein->file_protein);
					exit(1);
				}
				residue_populate(&tmp_protein->residue[isConsideredResidue], resName, MOLChainID, MOLResSeq);
				tmp_protein->residue[isConsideredResidue].Ca = MOLSubsID;
			}
		}
	}
	/* Cannot read mol2 format without the substructure section */
	if (in_substructure_section != FALSE ) {
		fprintf(stderr, "ERROR:\tUnable to find the Substructure section in SYBMOL2 file \"%s\".", tmp_protein->file_protein);
		exit(1);
	}
	
	/* Read the Atom section of the MOL2 File */
	fseek(fh, 0L, SEEK_SET);
	line[0]				= '\0';
	in_atom_section		= MINUSONE;
	isConsideredResidue	= MINUSONE;
	OLDSubsID			= QUADNINES;
	while(fgets(line, MAXLINE, fh) != NULL) {
		if ( strncmp(line, "@<TRIPOS>ATOM", THIRTEEN) == ZERO )
			in_atom_section = TRUE;
		else if ( in_atom_section == TRUE && strncmp(line, "@<TRIPOS>", EIGHT) == ZERO ) {
			in_atom_section = FALSE;
			break;
		}
		else if ( in_atom_section == TRUE ) {
			/* Read the line information*/
			readMOL2Line(line, MOLAtom, &MOLX, &MOLY, &MOLZ, MOLElement, &MOLSubsID, resName);
			if ( MOLSubsID != OLDSubsID ) {
				/*
				 * Get the index of the correponding residue if residue considered
				 * Returns -1 if the residue is not in the list of considered residues
				 */
				isConsideredResidue = isInSubstructureList(tmp_protein, MOLSubsID);
				/* Check if the residue information is correct */
				if ( isConsideredResidue > MINUSONE )
					if ( strncmp(tmp_protein->residue[isConsideredResidue].type_long, resName, MAX_RESIDUE_TYPE_LONG) != ZERO ) {
						fprintf(stderr, "ERROR:\tAmbiguous SYBMOL2 input. Residue %c %s%d (Subs %d) was found twice in file \"%s\"\n", tmp_protein->residue[isConsideredResidue].chain, tmp_protein->residue[isConsideredResidue].type_long, tmp_protein->residue[isConsideredResidue].id, MOLSubsID, tmp_protein->file_protein);
						exit(1);
					}
			}
			/* If is in the list */
			if ( isConsideredResidue > MINUSONE ) {
				/* Copy Atom */
				if (tmp_protein->residue[isConsideredResidue].size == MAXATOM) {
					fprintf(stderr, "ERROR:\tExceeding the maximum number of atoms for residue %c %s%d (Subs %d) in SYBMOL2 file \"%s\"\n", tmp_protein->residue[isConsideredResidue].chain, tmp_protein->residue[isConsideredResidue].type_long, tmp_protein->residue[isConsideredResidue].id, MOLSubsID, tmp_protein->file_protein);
					exit(1);
				}
				if (isConsideredAtom(MOLElement)) {
					atom_populate(&tmp_protein->residue[isConsideredResidue].atom[tmp_protein->residue[isConsideredResidue].size], MOLAtom, MOLX, MOLY, MOLZ, MOLElement);
					tmp_protein->residue[isConsideredResidue].size++;
				}
			}
			OLDSubsID = MOLSubsID;
		}
		line[ZERO] = '\0';
	}
	nb_residue_not_found = ZERO;
	for (i=ZERO;i<tmp_protein->size;i++) {
		if ( tmp_protein->residue[i].size == 0 ) {
			fprintf(stderr, "ERROR:\tResidue %c %d (Subs %d) is missing in file \"%s\".\n", tmp_protein->residue[i].chain, tmp_protein->residue[i].id, tmp_protein->residue[i].Ca, tmp_protein->file_protein);
			nb_residue_not_found++;
		}
	}
	if (nb_residue_not_found) {
		fprintf(stderr, "ERROR:\tNot all residues were found in SYBMOL2 file \"%s\".\n", tmp_protein->file_protein);
		exit(1);
	}
	/* Close file handle and exit */
	fclose(fh);
	/* Copy tmp_protein to protein */
	protein_copy(protein, tmp_protein);
	/* free tmp_protein */
	protein_free(tmp_protein);
	free(tmp_protein);
	return;
}

static void readPDBLine(char *line, char *PDBAtom, char *resName, char *PDBChainID, int *PDBResSeq, double *PDBX, double *PDBY, double *PDBZ, char *PDBElement) {
	int length;
	char *pointer;
	/* Atom type (long) */
	pointer = substr(line, PDBAtomIndex, PDBAtomLength);
	length = strlen(pointer) < MAX_ATOM_TYPE_LONG ? strlen(pointer) + ONE : MAX_ATOM_TYPE_LONG;
	strncpy(PDBAtom, pointer, length);
	free(pointer);
	pointer = stripstr(PDBAtom);
	length = strlen(pointer) < MAX_ATOM_TYPE_LONG ? strlen(pointer) + ONE : MAX_ATOM_TYPE_LONG;
	strncpy(PDBAtom, pointer, length);
	free(pointer);
	/* Residue Name */
	pointer = substr(line, PDBResNameIndex, PDBResNameLength);
	length = strlen(pointer) < MAX_RESIDUE_TYPE_LONG ? strlen(pointer) + ONE : MAX_RESIDUE_TYPE_LONG;
	strncpy(resName, pointer, length);
	free(pointer);
	pointer = stripstr(resName);
	length = strlen(pointer) < MAX_RESIDUE_TYPE_LONG ? strlen(pointer) + ONE : MAX_RESIDUE_TYPE_LONG;
	strncpy(resName, pointer, length);
	free(pointer);
	/* Chain Identifier */
	pointer = substr(line, PDBChainIDIndex, PDBChainIDLength);
	*PDBChainID = pointer[0];
	free(pointer);
	/* Residue Number */
	pointer = substr(line, PDBResSeqIndex, PDBResSeqLength);
	*PDBResSeq = atoi(pointer);
	free(pointer);
	/* X Coordinate */
	pointer = substr(line, PDBXIndex, PDBCoordLength);
	*PDBX = atof(pointer);
	free(pointer);
	/* Y Coordinate */
	pointer = substr(line, PDBYIndex, PDBCoordLength);
	*PDBY = atof(pointer);
	free(pointer);
	/* Z Coordinate */
	pointer = substr(line, PDBZIndex, PDBCoordLength);
	*PDBZ = atof(pointer);
	free(pointer);
	/* Element */
	pointer = substr(line, PDBElementIndex, PDBElementLength);
	length = strlen(pointer) < PDBElementLength+ONE ? strlen(pointer) + ONE : PDBElementLength+ONE;
	printf("");
	strncpy(PDBElement, pointer, length);
	free(pointer);
	pointer = stripstr(PDBElement);
	length = strlen(pointer) < PDBElementLength+ONE ? strlen(pointer) + ONE : PDBElementLength+ONE;
	strncpy(PDBElement, pointer, length);
	free(pointer);
	return;
}

static void readMOL2Line(char *line, char *MOLAtom, double *MOLX, double *MOLY, double *MOLZ, char *MOLElement, int *MOLSubsID, char *resName) {
	int i, res_type_length = ZERO;
	char TMPElement[MAXCHAR], TMPresName[MAX_RESIDUE_TYPE_LONG];
	sscanf(line, "%*s %s %lf %lf %lf %s %d %s", MOLAtom, MOLX, MOLY, MOLZ, TMPElement, MOLSubsID, TMPresName);
	for(i=ZERO;i<strlen(TMPresName);i++) {
		if ( isspace(TMPresName[i]) == 0 && isdigit(TMPresName[i]) == 0 )	res_type_length++;
		else break;
	}
	res_type_length = res_type_length < MAX_RESIDUE_TYPE_LONG ? res_type_length : MAX_RESIDUE_TYPE_LONG - ONE;
	strncpy(resName, TMPresName, res_type_length);
	resName[res_type_length]	= '\0';
	MOLElement[ZERO]			= TMPElement[ZERO];
	MOLElement[ONE]				= '\0';
	return;
}

static void readMOL2SubsLine(char *line, int *subs_id, char *res_type, int *res_id, char *subs_type, char *chain) {
	int i, res_type_length = ZERO;
	char tmp[MAXCHAR];
	sscanf(line, "%d %s %*s %s %*s %c", subs_id, tmp, subs_type, chain);
	for(i=ZERO;i<strlen(tmp);i++) {
		if (isspace(tmp[i]) == 0 && isdigit(tmp[i]) == 0) res_type_length++;
		else break;
	}		
	sscanf(&tmp[res_type_length], "%d", res_id);
	res_type_length = res_type_length < MAX_RESIDUE_TYPE_LONG ? res_type_length : MAX_RESIDUE_TYPE_LONG - ONE;
	strncpy(res_type, tmp, res_type_length);
	res_type[res_type_length]	= '\0';
	return;
}

static int isInResidueList(t_protein *protein, char chain, int residue) {
	int i, index = MINUSONE;
	for (i=ZERO;i<protein->size;i++)
		if (protein->residue[i].chain == chain && protein->residue[i].id == residue) {
			index = i;
			break;
		}
	return index;
}

static int isInSubstructureList(t_protein *protein, int subs_id) {
	int i, index = MINUSONE;
	for (i=ZERO;i<protein->size;i++)
		if (protein->residue[i].Ca == subs_id) {
			index = i;
			break;
		}
	return index;
}

static int isConsideredAtom(char *element) {
	if (strcmp(element, "C") == 0 || strcmp(element, "N") == 0 || strcmp(element, "O") == 0 || strcmp(element, "S") == 0)
		return TRUE;
	else if (strcmp(element, "H") == 0 || strcmp(element, "L") == 0) {
		return FALSE;
	}
	else {
		fprintf(stderr, "WARNING:\tDon't know what to do with element \"%s\". Ignoring for now!\n", element);
		/* return FALSE; */
		/* exit(1); */
	}
	return FALSE;
}

 /*
 * +--------------------------------------------------------------------------+
 * | FUNCTION FOR OUTPUT                                                      |
 * +--------------------------------------------------------------------------+
 */

void output_print_score(FILE *fh, t_protein *protein, t_alignment *alignment) {
	fprintf(fh, "Target\tResidue_count\tDistance1\tDistance2\tDistance3\tDistance4\tN\tN2\tN3\tN4\n");
	fprintf(fh, "%s\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%d\t%d\t%d\t%d\n",
		protein->file_protein,
		protein->size,
		alignment->score.d1,
		alignment->score.d2,
		alignment->score.d3,
		alignment->score.d4,
		alignment->score.N,
		alignment->score.N2,
		alignment->score.N3,
		alignment->score.N4
	);
	fprintf(fh, "\n");
	return;
}

void output_print_transfo(	FILE *fh,	t_point *center, t_transfo *transfo) {
	fprintf(fh, "Site center\t:\t");
	point_print(fh, center);
	transfo_print(fh, transfo);
	fprintf(fh, "\n");
	return;
}

void output_print_dist_local(FILE *fh, t_protein  *protein1, t_alignment *alignment1, t_protein *protein2, t_alignment *alignment2) {
	int i;
	char chain1, chain2;
	t_residue *res1, *res2;
	t_fp *map1 = alignment1->map, *map2 = alignment2->map;
	t_protein *p1 = NULL, *p2 = NULL;
	p1 = (t_protein *)calloc(ONE, sizeof(t_protein));
	p2 = (t_protein *)calloc(ONE, sizeof(t_protein));
	protein_copy(p1, protein1);
	protein_copy(p2, protein2);
	protein_prepare_d3_d4(p1);
	protein_prepare_d3_d4(p2);
	protein_transform_ca_cb_cm(p1, &alignment1->transfo);
	protein_transform_ca_cb_cm(p2, &alignment2->transfo);
	fprintf(fh, "Reference\t:\tTarget\tLocal distance\tDistance between rotamer specific side chain atoms\n");
	for (i=ZERO;i<ICOSA_NUM_TRIANGLES;i++)
		if ( map1[i][FP_BITS] == TRUE && map2[i][FP_BITS] == TRUE ) {
			chain1	= p1->residue[ map1[i][FP_RESIDUE_INDEX] ].chain == ' ' ? '-' : p1->residue[ map1[i][FP_RESIDUE_INDEX] ].chain;
			chain2	= p2->residue[ map2[i][FP_RESIDUE_INDEX] ].chain == ' ' ? '-' : p2->residue[ map2[i][FP_RESIDUE_INDEX] ].chain;
			res1	= &p1->residue[ map1[i][FP_RESIDUE_INDEX] ];
			res2	= &p2->residue[ map2[i][FP_RESIDUE_INDEX] ];
			fprintf(fh, "%c%-4d [%c]\t:\t%c%-4d [%c]\t%.4f\t%.4f\n",
				p1->residue[ map1[i][FP_RESIDUE_INDEX] ].type,
				p1->residue[ map1[i][FP_RESIDUE_INDEX] ].id,
				chain1,
				p2->residue[ map2[i][FP_RESIDUE_INDEX] ].type,
				p2->residue[ map2[i][FP_RESIDUE_INDEX] ].id,
				chain2,
				1 - fp_compare(&map1[i], &map2[i]),
				point_distance( &res1->atom[res1->Cb].coord, &res2->atom[res2->Cb].coord )
			);
		}
	fprintf(fh, "\n");
	protein_free(p1);
	protein_free(p2);
	free(p1);
	free(p2);
	return;
}

void output_print_map(FILE *fh,  t_protein  *protein1, t_fp *map1, t_protein *protein2, t_fp *map2) {
	int i, j;
	char chain;
	fprintf(fh, "Full Fingerprint\tSite1\tSite2\n");
	for (i=ZERO;i<ICOSA_NUM_TRIANGLES;i++) {
		/* Print Triangle ID*/
		fprintf(fh, "%d\t", i + 1);
		/* Print First FP */
		if (map1[i][FP_BITS] == TRUE) {
			chain = protein1->residue[ map1[i][FP_RESIDUE_INDEX] ].chain == ' ' ? '-' : protein1->residue[ map1[i][FP_RESIDUE_INDEX] ].chain;
			fprintf(fh, "%c%-4d [%c] : ",
				protein1->residue[ map1[i][FP_RESIDUE_INDEX] ].type,
				protein1->residue[ map1[i][FP_RESIDUE_INDEX] ].id,
				chain
			);
		}
		else
			fprintf(fh, "empty     : ");
		for (j=0; j<FP_SIZE_SCORE; j++) {
			fprintf(fh, "%d", map1[i][j]);
			if (j < FP_SIZE_SCORE - ONE)
				fprintf(fh, ",");
		}
		/* Print Second FP */
		if (map2[i][FP_BITS] == TRUE) {
			chain = protein2->residue[ map2[i][FP_RESIDUE_INDEX] ].chain == ' ' ? '-' : protein2->residue[ map2[i][FP_RESIDUE_INDEX] ].chain;
			fprintf(fh, "\t%c%-4d [%c] : ",
				protein2->residue[ map2[i][FP_RESIDUE_INDEX] ].type,
				protein2->residue[ map2[i][FP_RESIDUE_INDEX] ].id,
				chain
			);
		}
		else
			fprintf(fh, "\tempty     : ");
		for (j=0; j<FP_SIZE_SCORE; j++) {
			fprintf(fh, "%d", map2[i][j]);
			if (j < FP_SIZE_SCORE - ONE)
				fprintf(fh, ",");
		}
		fprintf(fh, "\n");
	}
	fprintf(fh, "\n");
	return;
}

void align_protein_file(char *infile, char *outfile, t_transfo *transfo, t_point *center) {
 	char PDB_EXT[EXT_PDB_SIZE]		= EXT_PDB;
 	char MOL2_EXT[EXT_MOL2_SIZE]	= EXT_MOL2;
 	/*
 	 * See if the filename of the protein file is long enough to have at least one extension.
 	 * In case it does, call the corresponding function PDB, Mol2, etc...
 	 * Otherwise, print error and quit.
 	 */
	if ( strlen(infile) < strlen(PDB_EXT) ) {
		fprintf(stderr, "ERROR:\tDon't know the format of file \"%s\".\n", infile);
		exit(1);
	}
	else if ( strncmp(infile + ( strlen(infile) - strlen(PDB_EXT) ), PDB_EXT, strlen(PDB_EXT) ) == ZERO )
		align_pdb_file(infile, outfile, transfo, center);
	else if ( strncmp(infile + ( strlen(infile) - strlen(MOL2_EXT) ), MOL2_EXT, strlen(MOL2_EXT) ) == ZERO )
		align_mol2_file(infile, outfile, transfo, center);
	else {
		fprintf(stderr, "ERROR:\tDon't know the format of file \"%s\".\n", infile);
		exit(1);
	}
	return;
}

static void align_pdb_file(char *infile, char *outfile, t_transfo *transfo, t_point *center) {
	int 		i, j;
	char 		*pointer, line[MAXLINE];
 	FILE 		*fh_in = NULL, *fh_out = NULL;
 	double		x, y, z;
	t_point		point_tmp;
	t_point		center_translation					= {ZERO,ZERO,ZERO};
	t_angle		center_rotation						= {ZERO,ZERO,ZERO};
	t_transfo	center_transfo;
	char		*coord_tmp							= NULL;
 	char 		sitealign_version[MAXCHAR]			= SITEALIGN_VERSION;
 	char 		PDB_EXT[EXT_PDB_SIZE]				= EXT_PDB;
 	char 		PDB_EXT_ALIGN[EXT_PDB_ALIGN_SIZE]	= EXT_PDB_ALIGN; 
 	char 		*ext;

	/* Create transformation struc to place site center at the origin*/
	point_set(&center_translation, -center->x, -center->y, -center->z);
	transfo_set(&center_transfo, center_rotation, center_translation);

	if (outfile == NULL) {
		/*
		 * Create a filename for the output
		 * We want too write the file in the current directory (get rid of all dirs in name)
		 * ".pdb" , if exists, is replaced by "_sa_aligned.pdb"
		 * otherwise "_sa_aligned.pdb" is appended to the name.
		 */
		j = ZERO;
		outfile = (char *)calloc(strlen(infile) + MAXCHAR, sizeof(char));
		for (i=strlen(infile); i>ZERO; i--)
			if (infile[i-ONE] == '/') {
				j = i;
				break;
			}
		strncpy(outfile, infile + j, strlen(infile) - j);
		ext = substr(outfile, strlen(outfile) - strlen(PDB_EXT), strlen(PDB_EXT));
		if (strncmp(ext, PDB_EXT, strlen(PDB_EXT)) == ZERO) {
			strncpy(outfile + strlen(outfile) - strlen(PDB_EXT), PDB_EXT_ALIGN, sizeof(PDB_EXT_ALIGN));
		}
		else {
			strncpy(outfile + strlen(outfile), PDB_EXT_ALIGN, sizeof(PDB_EXT_ALIGN));
		}
		free(ext);
	}

	if( ( fh_in  = fopen(infile,  "r") ) == NULL ) {
		fprintf(stderr, "ERROR:\tCannot open %s for reading", infile);
		exit(ONE);
	}
	if( ( fh_out = fopen(outfile, "w") ) == NULL ) {
		fprintf(stderr, "ERROR:\tCannot open %s for reading", outfile);
		exit(ONE);
	}

	/* Print comment lines indicating the transformation */
	fprintf(fh_out, "REMARK    Modified by SiteAlign Version %s\n", sitealign_version);
	fprintf(fh_out, "REMARK    Site center: ");
	point_print(fh_out, center);
	fprintf(fh_out, "REMARK    Translation : ");
	point_print(fh_out, &transfo->translation);
	fprintf(fh_out, "REMARK    Rotation angle : ");
	angle_print(fh_out, &transfo->rotation_angle);

	coord_tmp = (char *)calloc(PDBCoordLength, sizeof(char));
	while(fgets(line, MAXLINE, fh_in) != NULL ) {
		if ( strncmp(line, "ATOM", FOUR) == ZERO || strncmp(line, "HETATM", SIX) == ZERO) {	
			/* X Coordinate */
			pointer = substr(line, PDBXIndex, PDBCoordLength);
			x = atof(pointer);
			free(pointer);
			/* Y Coordinate */
			pointer = substr(line, PDBYIndex, PDBCoordLength);
			y = atof(pointer);
			free(pointer);
			/* Z Coordinate */
			pointer = substr(line, PDBZIndex, PDBCoordLength);
			z = atof(pointer);
			free(pointer);
			/* Re-center coordinate and rotation + translation */
			point_set(&point_tmp, x, y, z);
			point_transform(&point_tmp, &center_transfo);
			point_transform(&point_tmp, transfo);
			/* Substitute in the coordinates in the line */
			sprintf(coord_tmp, "%8.3f", point_tmp.x);
			strncpy(line + PDBXIndex, coord_tmp, PDBCoordLength);
			sprintf(coord_tmp, "%8.3f", point_tmp.y);
			strncpy(line + PDBYIndex, coord_tmp,  PDBCoordLength);
			sprintf(coord_tmp, "%8.3f", point_tmp.z);
			strncpy(line + PDBZIndex, coord_tmp,  PDBCoordLength);
		}
		fprintf(fh_out, "%s", line);
	}
	free(coord_tmp);
	fclose(fh_in);
	fclose(fh_out);
	
	return;
}

static void align_mol2_file(char *infile, char *outfile, t_transfo *transfo, t_point *center) {
	int 		i, j;
	int			in_atom_section = MINUSONE;
	char 		*pointer, line[MAXLINE];
 	FILE 		*fh_in = NULL, *fh_out = NULL;
 	int			atom_id;
 	double		x, y, z;
 	char		atom_type[MAXCHAR];
	t_point		point_tmp;
	t_point		center_translation					= {ZERO,ZERO,ZERO};
	t_angle		center_rotation						= {ZERO,ZERO,ZERO};
	t_transfo	center_transfo;
 	char 		sitealign_version[MAXCHAR]			= SITEALIGN_VERSION;
 	char 		MOL_EXT[EXT_MOL2_SIZE]				= EXT_MOL2;
 	char 		MOL_EXT_ALIGN[EXT_MOL2_ALIGN_SIZE]	= EXT_MOL2_ALIGN; 
 	char 		*ext;

 	/* Create transformation struc to place site center at the origin*/
	point_set(&center_translation, -center->x, -center->y, -center->z);
	transfo_set(&center_transfo, center_rotation, center_translation);

	if (outfile == NULL) {
		/*
		 * Create a filename for the output
		 * We want too write the file in the current directory (get rid of all dirs in name)
		 * ".mol2" , if exists, is replaced by "_sa_aligned.mol2"
		 * otherwise "_sa_aligned.mol2" is appended to the name.
		 */
		j = ZERO;
		outfile = (char *)calloc(strlen(infile) + MAXCHAR, sizeof(char));
		for (i=strlen(infile); i>ZERO; i--)
			if (infile[i-ONE] == '/') {
				j = i;
				break;
			}
		strncpy(outfile, infile + j, strlen(infile) - j);
		ext = substr(outfile, strlen(outfile) - strlen(MOL_EXT), strlen(MOL_EXT));
		if (strncmp(ext, MOL_EXT, strlen(MOL_EXT)) == ZERO) {
			strncpy(outfile + strlen(outfile) - strlen(MOL_EXT), MOL_EXT_ALIGN, sizeof(MOL_EXT_ALIGN));
		}
		else {
			strncpy(outfile + strlen(outfile), MOL_EXT_ALIGN, sizeof(MOL_EXT_ALIGN));
		}
		free(ext);
	}

	if( ( fh_in  = fopen(infile,  "r") ) == NULL ) {
		fprintf(stderr, "ERROR:\tCannot open %s for reading", infile);
		exit(ONE);
	}
	if( ( fh_out = fopen(outfile, "w") ) == NULL ) {
		fprintf(stderr, "ERROR:\tCannot open %s for reading", outfile);
		exit(ONE);
	}

	/* Print comment lines indicating the transformation */
	fprintf(fh_out, "#\tModified by SiteAlign Version %s\n", sitealign_version);
	fprintf(fh_out, "#\tSite center: ");
	point_print(fh_out, center);
	fprintf(fh_out, "#\tTranslation : ");
	point_print(fh_out, &transfo->translation);
	fprintf(fh_out, "#\tRotation angle : ");
	angle_print(fh_out, &transfo->rotation_angle);
	fprintf(fh_out, "\n");

	while(fgets(line, MAXLINE, fh_in) != NULL) {
		if ( strncmp(line, "@<TRIPOS>ATOM", THIRTEEN) == ZERO ) {
			fprintf(fh_out, "%s", line);
			in_atom_section = TRUE;
		}
		else if ( in_atom_section == TRUE && strncmp(line, "@<TRIPOS>", EIGHT) == ZERO ) {
			fprintf(fh_out, "%s", line);
			in_atom_section = FALSE;
		}
		else if ( in_atom_section == TRUE ) {
			sscanf(line, "%d %s %lf %lf %lf", &atom_id, atom_type, &x, &y, &z);
			/* Get the atom coordinates and apply center + rotation + translation */
			point_set(&point_tmp, x, y, z);
			point_transform(&point_tmp, &center_transfo);
			point_transform(&point_tmp, transfo);
			/* Print two first fields -> atom number and atom code */
			fprintf(fh_out, "%7d %-8s", atom_id, atom_type);
			/* Print the coordinates */
			fprintf(fh_out, " %9.4f %9.4f %9.4f", point_tmp.x, point_tmp.y, point_tmp.z);
			/* Skip the five fields corresponding to the coordinates */
			pointer	= line;
			for(i=ZERO;i<FIVE;i++) {
				while( isspace(*pointer) != ZERO ) pointer++;
				while( isspace(*pointer) == ZERO ) pointer++;
			}
			/* Print the rest */
			fprintf(fh_out, "%s", pointer);
		}
		else {
			fprintf(fh_out, "%s", line);
		}
	}

	/* Close filehandles and exit */
	fclose(fh_in);
	fclose(fh_out);
	return;
}
