/*
 *
 * SiteAlign-4.0
 * Created: 20081222 Jerome Hert Based on SiteAlign-3.6 written by Jean-Sebastien Surgand and Nicolas Foata
 *  
 */

#include "SiteAlign.h"
#define OPTIONS "?Vvn:r:t:p:q:i:o:e:l:ad"

/*
 * +--------------------------------------------------------------------------+
 * | MAIN                                                                     |
 * +--------------------------------------------------------------------------+
 */
int main(int argc, char **argv)
 {
	/*	Variables	*/
	int		verbosity			= FALSE;
	int		output_details		= FALSE;
	int		align_protein_files	= FALSE;
	int		nb_passes			= ALIGN_NB_PASSES;
	char	file_in[MAXCHAR]	= "\0";
	char	file_out[MAXCHAR]	= "\0";
	char	file_err[MAXCHAR]	= "\0";
	char	file_lic[MAXCHAR]	= "\0";
	char	input_line[MAXLINE] = "\0";

	t_explore	explore_params;

 	t_protein	*protein_query		= NULL;
 	t_protein	*protein_target		= NULL;

 	t_alignment	*alignment_query	= NULL;
 	t_alignment	*alignment_target	= NULL;

	explore_initialize(&explore_params);
	
	/* GETOPT Variables */
	int			optc;
	extern int	optind;
	static struct option long_options[] = {
		{"help",				no_argument,		0,	'?'},
		{"verbose",				no_argument,		0,	'v'},
		{"version",				no_argument,		0,	'V'},
		{"nb_passes",			required_argument,	0,	'n'},
		{"nrot",				required_argument,	0,	'p'},
		{"ntrans",				required_argument,	0,	'q'},
		{"irot",				required_argument,	0,	'r'},
		{"itrans",				required_argument,	0,	't'},
		{"infile",				required_argument,	0,	'i'},
		{"outfile",				required_argument,	0,	'o'},
		{"errfile",				required_argument,	0,	'e'},
		{"licensefile",			required_argument,	0,	'l'},
		{"output_details",		no_argument,    	0,	'd'},
		{"align_protein_files",	no_argument,    	0,	'a'},
        {0, 0, 0, 0}
    };

	/* PARSING ARGUMENTS */
	while ( ( optc = getopt_long(argc, argv, OPTIONS, long_options, &optind) ) != MINUSONE )
		switch (optc)
		{
			case '?':
				print_help();
				exit(0);
				break;
			case 'V':
				print_version();
				exit(0);
				break;
			case 'v':
				verbosity = TRUE;
				break;
			case 'p':
				explore_set_nrot(&explore_params, atoi(optarg));
				break;
			case 'q':
				explore_set_ntrans(&explore_params, atoi(optarg));
				break;
			case 'r':
				explore_set_irot(&explore_params, atof(optarg));
				break;
			case 't':
				explore_set_itrans(&explore_params, atof(optarg));
				break;
			case 'i':
				strcpy( file_in, optarg );
				if( freopen(file_in, "r", stdin) == NULL ) {
					fprintf(stderr, "Error : unknow file %s\n", file_in);
					exit(1);
				}
				break;
			case 'o':
				strcpy( file_out, optarg );
				if( freopen(file_out, "w", stdout) == NULL ) {
					fprintf(stderr, "Error : unknow file %s\n", file_out);
					exit(1);
				}
				break;
			case 'e':
				strcpy( file_err, optarg );
				if( freopen(file_err, "w", stderr) == NULL ) {
					fprintf(stderr, "Error : unknow file %s\n", file_err);
					exit(1);
				}
				break;
			case 'l':
				strcpy( file_lic, optarg );
				break;
			case 'n':
				nb_passes		= atoi(optarg);
				break;
			case 'd':
				output_details	= TRUE;
				break;
			case 'a':
				align_protein_files	= TRUE;
				break;
			default :
				fprintf(stderr, "ERROR: Unknown option %c, type --help for help.\n", optc);
				print_help();
				exit(ZERO);
		}
/*
 * +--------------------------------------------------------------------------+
 * | READ INPUT FILE                                                          |
 * +--------------------------------------------------------------------------+
 */

	/* Print Welcome Banner */
	if (verbosity)
		print_credentials(stderr);

	/* Check license */
	if ( check_license(stderr, file_lic, verbosity) ) {
		fprintf(stderr, "ERROR:\tInvalid license file\n");
		exit(0);
	}

	/* Say if reading from standard input */
	if (file_in[ZERO] == '\0' && verbosity)
		fprintf(stderr, "WARNING:\tReading input file from STDIN\n");

/*
 * +--------------------------------------------------------------------------+
 * | READ QUERY FILES                                                         |
 * +--------------------------------------------------------------------------+
 */

	/* Read the first line */
	while ( feof(stdin) == FALSE ) {
		input_line[ZERO] = '\0';
		fgets(input_line, MAXLINE, stdin);
		if ( input_is_comment(input_line) == TRUE )
			continue;
		break;
	}

	/* Just check if the file is not empty */
	if ( strlen(input_line) == ZERO ) {
		fprintf(stderr, "ERROR:\tInput file empty.\n");
		exit(1);
	}

	/* Allocate memory for the query and the alignment structures */
	protein_query	= (t_protein *)calloc(ONE, sizeof(t_protein));
	alignment_query	= (t_alignment *)calloc(ONE, sizeof(t_alignment));
	protein_initialize(protein_query);
	alignment_initialize(alignment_query);

	/* Process input line*/
	input_process_line(input_line, protein_query);
	if (DEBUG_INPUT)
		input_print(stderr, protein_query);

	/* Read residue list */
	input_read_residues(protein_query);
	if (DEBUG_INPUT) {
		fprintf(stderr, "WARNING:\tPrinting Residue file \"%s\"\n", protein_query->file_residues);
		input_print_residues(stderr, protein_query);
	}

	/* Read Protein file */
	input_read_protein(protein_query);

	/* Calculate center of CAs for protein */
	protein_prepare(protein_query);
	protein_centerofca(protein_query);
	if (DEBUG_INPUT)
		protein_print(stderr, protein_query);
	if (verbosity) {
		fprintf(stderr, "WARNING:\tRead file %s with residues in file %s. %d residues in site G = [%8.3f %8.3f %8.3f]\n",
			protein_query->file_protein,
			protein_query->file_residues,
			protein_query->size,
			protein_query->centerofca.x, protein_query->centerofca.y, protein_query->centerofca.z 
		);
	}

	/* Center all coordinates to origin (But only if Nrot or Ntrans are not equal to 0) */
	if (explore_params.rotation_nb != ZERO || explore_params.translation_nb != ZERO)
		protein_to_origin(protein_query);
	
	/* Get the CAs CBs and calculate center of mass */
	if (protein_prepare(protein_query) != ZERO)
		fprintf(stderr, "WARNING:\tThere were missing CAa and CBs in file \"%s\".\n", protein_query->file_protein);

	/* Compute mapping of the query protein */
	map_compute( alignment_query->map, protein_query);

	/* Print the results */
	print_credentials(stdout);
	fprintf(stdout, "Reference:\n");
	fprintf(stdout, " - Filename\t:\t%s\n", protein_query->file_protein);
	fprintf(stdout, " - Residue count\t:\t%d\n", protein_query->size);
	fprintf(stdout, " - Site center\t:\t");
	point_print( stdout, &protein_query->centerofca );
	fprintf(stdout, "\n");

	/* Print aligned query protein file */
	if (align_protein_files)
		align_protein_file(protein_query->file_protein, NULL, &alignment_query->transfo, &protein_query->centerofca);

/*
 * +--------------------------------------------------------------------------+
 * | ITERATIVELY READ ALL TARGET FILES, ALIGN AND PRINT OUTPUT                |
 * +--------------------------------------------------------------------------+
 */

	while( feof(stdin) == FALSE ) {

		/* Read line and checks if it's not a comment */
		input_line[ZERO] = '\0';
		fgets(input_line, MAXLINE, stdin);
		if ( input_is_comment(input_line) == TRUE )
			continue;

		/* Allocate memory for the protein and the alignment structure */
		protein_target		= (t_protein *)calloc(ONE, sizeof(t_protein));
		alignment_target	= (t_alignment *)calloc(ONE, sizeof(t_alignment));
		protein_initialize(protein_target);
		alignment_initialize(alignment_target);
		
		/* Process input line*/
		input_process_line(input_line, protein_target);
		if (DEBUG_INPUT)
			input_print(stderr, protein_target);

		/* Read residue list */
		input_read_residues(protein_target);
		if (DEBUG_INPUT) {
			fprintf(stderr, "WARNING:\tPrinting Residue file \"%s\"\n", protein_target->file_residues);
			input_print_residues(stderr, protein_target);
		}

		/* Read Protein file */
		input_read_protein(protein_target);

		/* Calculate center of Calpha for protein*/
		protein_prepare(protein_target);
		protein_centerofca(protein_target);
		if (DEBUG_INPUT)
			protein_print(stderr, protein_target);
		if (verbosity) {
			fprintf(stderr, "WARNING:\tRead file %s with residues in file %s. %d residues in site G = [%8.3f %8.3f %8.3f]\n",
				protein_target->file_protein,
				protein_target->file_residues,
				protein_target->size,
				protein_target->centerofca.x, protein_target->centerofca.y, protein_target->centerofca.z 
			);
		}

		/* Center all coordinates to origin (But only if Nrot or Ntrans are not equal to 0) */
		if (explore_params.rotation_nb != ZERO || explore_params.translation_nb != ZERO)
			protein_to_origin(protein_target);

		/* Get the CAs CBs and calculate center of mass */	
		if (protein_prepare(protein_target) != ZERO )
			fprintf(stderr, "WARNING:\tThere were missing CAs and CBs in file \"%s\".\n", protein_query->file_protein);


		/*
		 * Align and score the target against the query
		 * If Nrot = Ntrans = 0, skip the alignment phase and just score
		 * the current geometry of the proteins 
		 */
		if (explore_params.rotation_nb == ZERO && explore_params.translation_nb == ZERO) {
			map_compute( alignment_target->map, protein_target);
			map_compare(&alignment_target->score, alignment_query->map, alignment_target->map, ZERO);
			calculate_d3_d4(protein_query, protein_target, alignment_query, alignment_target);
		}
		else {
			align(protein_query, protein_target, alignment_query, alignment_target, &explore_params, nb_passes, verbosity);
		}

		/* Print the results */
		output_print_score(stdout, protein_target, alignment_target);
		if (output_details) {
			output_print_transfo(	stdout, &protein_target->centerofca, &alignment_target->transfo);
			output_print_dist_local(stdout, protein_query, alignment_query, protein_target, alignment_target);
			output_print_map(		stdout, protein_query, alignment_query->map, protein_target, alignment_target->map);
		}

		/* Print aligned protein file */
		if (align_protein_files)
			align_protein_file(protein_target->file_protein, NULL, &alignment_target->transfo, &protein_target->centerofca);

		/* Free t_protein and t_alignment pointers */
		protein_free(protein_target);
		if (protein_query		!= NULL)	free(protein_target);
		if (alignment_target	!= NULL)	free(alignment_target);
		protein_target		= NULL;
		alignment_target	= NULL;
	}

/*
 * +--------------------------------------------------------------------------+
 * | CLEAN EXIT -> FREE EVERYTHING                                                          |
 * +--------------------------------------------------------------------------+
 */

	protein_free(protein_query);
	if (protein_query != NULL)		free(protein_query);
	if (alignment_query != NULL)	free(alignment_query);

/*
 * +--------------------------------------------------------------------------+
 * | EXIT                                                                     |
 * +--------------------------------------------------------------------------+
 */
	exit(ZERO);
}
 
void print_credentials(FILE *fh) {
 	char sitealign_version[MAXCHAR] = SITEALIGN_VERSION;
	fprintf( fh, "-------------------------------------------------------------\n");
	fprintf( fh, "               SiteAlign Version %s\n", sitealign_version);
	fprintf( fh, "\n");
	fprintf( fh, "                       Jan. 2009\n");
	fprintf( fh, "\n");
	fprintf( fh, "               Bioinformatics of the Drug\n");
	fprintf( fh, "                    CNRS UMR7175-LC1\n");
	fprintf( fh, "                    F-67410 Illkirch\n");
	fprintf( fh, "            http://bioinfo-pharma.u-strasbg.fr\n");
	fprintf( fh, "-------------------------------------------------------------\n");
 	return;
 }

void print_version(void) {
 	char sitealign_version[MAXCHAR] = SITEALIGN_VERSION;
	fprintf(stderr, "SiteAlign Version %s\n", sitealign_version);
}

void print_help(void) {
 	char sitealign_version[MAXCHAR] = SITEALIGN_VERSION;
 	int  nb_rotations				= SITEALIGN_NROTATION_DEFAULT;
 	int  nb_translations			= SITEALIGN_NTRANSLATION_DEFAULT;
 	int  nb_passes					= ALIGN_NB_PASSES;
 	double int_rotation				= SITEALIGN_IROTATION_DEFAULT;
 	double int_translation			= SITEALIGN_ITRANSLATION_DEFAULT;

 	fprintf(stderr, "SiteAlign Version %s\n", sitealign_version);
 	fprintf(stderr,
 		"\n"
 		"Usage:\tSiteAlign [options] --infile [file] --outfile [file]\n"
 		"\n"
 		"Description:\n"
 		"\tSiteAlign aligns the site of the first line of the input\n"
 		"\tfile (as defined by a PDB/SYBMOL2 file and a file listing\n"
 		"\tthe chains and numbers of the residues constituting the site)\n"
 		"\tto all the other proteins listed in the input file.\n"
 		"\n"
 		"Options:\n"
		"\t--infile|i  [file]      : path and name of the input file\t[DEFAULT is stdin]\n"
		"\t--outfile|o [file]      : path and name of the output file\t[DEFAULT is stdout]\n"
		"\t--errfile|e [file]      : path and name of the error/log file\t[DEFAULT is stderr]\n"
 		"\t--licensefile|l [file]  : path and name of the license file\t[DEFAULT is $HOME/.siteAlign/licence.txt]\n"
 		"\n"
 	);
 	fprintf(stderr,
		"\t--nb_passes|n [int]     : Number of passes in the alignment phase\t[DEFAULT is %d]\n", nb_passes);
 	fprintf(stderr,
		"\t--nrot|p    [int]       : defines the number of rotation steps\t[DEFAULT is %d]\n", nb_rotations);
 	fprintf(stderr,
		"\t--ntrans|q  [int]       : defines the number of translation steps\t[DEFAULT is %d]\n", nb_translations);
 	fprintf(stderr,
		"\t--irot|r    [double]    : defines the total rotation intensity\t[DEFAULT is %g]\n", int_rotation);
 	fprintf(stderr,
		"\t--itrans|t  [double]    : defines the total translation intensity\t[DEFAULT is %g]\n", int_translation);
	fprintf(stderr,
 		"\n"
 		"\t--output_details|d      : print details in the output (matched residues and FP)\t[DEFAULT is 0]\n"
 		"\t--align_protein_files|a : write the aligned proteins in a pdb file (in the current directory) 	[DEFAULT is 0]\n"
		"\t--verbose|v             : sets verbosity level to 1\t\t[DEFAULT is 0]\n"
		"\t--version|V             : prints this version\n"
		"\t--help|?                : prints this message\n"
 		"\n"
 		"File formats:\n"
 		"\to Input file (SiteAlign can read PDB and SYBMOL2 formats--the substructure section in the SYBMOL2 file is mandatory)\n"
 		"\tquery.mol2 query_list_of_residues.txt\n"
 		"\tcandidate1.pdb candidate1_list_of_residues.txt\n"
 		"\tcandidate2.pdb candidate2_list_of_residues.txt ...\n"
 		"\t\n"
 		"\to Residue list file (blank lines and lines starting with \'#\' or spaces are ignored)\n"
 		"\tChain Number\n"
 		"\tA 45 (lists the residue # 45 of chain A)\n"
 		"\t- 124 (list the residue # 124 of a unamed chain--space in the pdb file)\n"
 		"\n"
 		"\to Output:\n"
 		"\td1 is a global measure of binding site similarity (normalised with N). Sites are considered similar if d1 < 0.6\n"
 		"\td2 detects rather loacal similarity (and is normalized using N2). Sites are considered similar if d2 < 0.2\n"
 		"\td3 is identical to d2, but it uses the distance to the center of the rotamer specific side chain atom rather than the CB\n"
 		"\td4 is the RMSD of the distances between the rotamer specific side chain atoms of the aligned residues\n"
 		"\tN  is the number of triangles, non-null for either sites\n"
 		"\tN2 is the number of triangles, non-null for both sites \n"
 		"\tN3 is the number of triangles, non-null for both sites and leading to local distances <= 0.2\n"
 		"\tN4 is the number of triangles, non-null for both sites and polar\n"
 		"\tSee http://dx.doi.org/10.1002/prot.21858 for additional details.\n"
 		"\n"
 		"Authors:\n"
 		"\tJerome Hert, Jean-Sebastien Surgand, Nicolas Foata and Didier Rognan\n"
 		"\n"
 		"Copyright:\n"
 		"\tCopyright (c) 2009 Didier Rognan and Universite de Strasbourg\n"
 		"\n"
 	);
 	return;
 }
