// hierarchical.c
// Evan Lord
// Created: August 16, 2007
// Last Modified: August 16, 2007


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <mcheck.h>
#include "constants.h"
#include "struct_def.h"


void Cluster(struct DATA *, float);
int ComparisonFunction(const void *, const void *);
void CreateSuperPoints(struct DATA *);
float Distance(float *, float *, int);
void MLClosure(struct DATA *);
void Normalize(struct DATA *);
void ReadConstraints(char *, struct DATA *);
void ReadData(char *, char *, struct DATA *);
void ReadNames(char *, struct DATA *);
void StoreResults(struct DATA *);
int Violate(struct SUPER_POINT *, struct SUPER_POINT *, struct DATA *);


float * distance_array;


int main(int argc, char * argv[])
{
  struct DATA * data;

  int int_length;
  int int_loop;
  int int_temp;  

  float flt_factor;
  float flt_gamma;

  char chr_temp;

  // check for correct number of arguments
  if((argc != 4) && (argc != 5)){
    fprintf(stderr, "\nUsage:\t%s <names file> <data file> <constraints file> <gamma>\n\n", argv[0]);
    exit(1);      
  }

  flt_gamma = 0.0;
  if(argc == 5){
    flt_factor = 1.0;
    int_length = strlen(argv[4]);
    for(int_loop = 1; int_loop <= int_length; int_loop++){
      chr_temp = argv[4][int_length - int_loop];
      int_temp = chr_temp - ASCII_ZERO;
      flt_gamma = flt_gamma + (float)int_temp * flt_factor;
      flt_factor = flt_factor * 10.0;
    }
  }
  
  // allocate memory for the DATA struct
  if((data = (struct DATA *)malloc(sizeof(struct DATA))) == NULL){
    fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
    exit(1);
  }
  else{
    data->int_algorithm_version = -1;

    // call function to read names file
    ReadNames(argv[1], data);   

    // call function to read data file
    ReadData(argv[2], data->str_names, data);
    
    // call function to normalize the data
    Normalize(data);

    // call function to read constraints file
    ReadConstraints(argv[3], data);

    // perform transitive closure on ML constraints
    MLClosure(data);

    printf("\nHierarchical Agglomerative Clustering\nGamma: %f\n", flt_gamma);

    Cluster(data, flt_gamma);

    StoreResults(data);

    printf("Kmin: %d\nKmax: %d\n\n", data->Kmin, data->Kmax);
  }

  return 0;
}

void Cluster(struct DATA * data, float gamma)
{
  int ** temp;
  int int_rows_alloc;
  int int_iteration;
  int int_loop_count;
  int int_outer_loop;
  int int_inner_loop;
  //int int_constraint_violated;
  //int int_valid_cluster;
  int int_num_clusters;
  int int_num_distances;
  int int_pair_index;
  int int_num_dist_calc;

  int * index_array;
  
  struct SUPER_POINT *** sp_pair_array;

  struct SUPER_POINT * point_one;
  struct SUPER_POINT * point_two;
  struct SUPER_POINT * point_to_merge_one;
  struct SUPER_POINT * point_to_merge_two;
  struct SUPER_POINT * prev_point;
  struct SUPER_POINT * next_point;

  struct DATA_POINT * point_one_tail;
  struct DATA_POINT * point_two_head;
  struct DATA_POINT * point_two_tail;

  //float shortest_distance;
  float distance;

  int_iteration = 0;
  int_rows_alloc = 1;
  
  if((temp = (int **)realloc(data->dendrogram, ((int_rows_alloc + 9) * sizeof(int *)))) == NULL){
    fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
    exit(1);
  }
  else{  
    int_rows_alloc = int_rows_alloc + 9;
    data->dendrogram = temp;
  }

  int_num_clusters = data->Kmax;
  int_num_dist_calc = 0;

  while(1){

    point_to_merge_one = NULL;
    point_to_merge_two = NULL;

    point_one = data->super_point_head;
    point_two = point_one->pNext;
    
    if(point_two != NULL){

      point_to_merge_one = point_one;
      point_to_merge_two = point_two;


      /* Method using qsort */

      int_num_distances = 0;
      for(int_loop_count = 0; int_loop_count < int_num_clusters; int_loop_count++){
	int_num_distances = int_num_distances + int_loop_count;
      }

      if((index_array = (int *)malloc(int_num_distances * sizeof(int))) == NULL){
	fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
	exit(1);
      }
      else{
	if((distance_array = (float *)malloc(int_num_distances * sizeof(float))) == NULL){
	  fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
	  exit(1);
	}
	else{
	  if((sp_pair_array = (struct SUPER_POINT ***)malloc(int_num_distances * sizeof(struct SUPER_POINT **))) == NULL){
	    fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
	    exit(1);
	  }
	  else{
	    for(int_loop_count = 0; int_loop_count < int_num_distances; int_loop_count++){
	      if((sp_pair_array[int_loop_count] = (struct SUPER_POINT **)malloc(2 * sizeof(struct SUPER_POINT *))) == NULL){
		fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
		exit(1);
	      }
	      else{
		
		// initialize all pointers to NULL 
		sp_pair_array[int_loop_count][0] = NULL;
		sp_pair_array[int_loop_count][1] = NULL;
		index_array[int_loop_count] = int_loop_count;

	      }
	    }
	  }
	}
      }

      point_one = data->super_point_head;
      point_two = point_one->pNext;
      for(int_loop_count = 0; int_loop_count < int_num_clusters - 1; int_loop_count++){
	sp_pair_array[int_loop_count][0] = point_one;
	sp_pair_array[int_loop_count][1] = point_two;
	distance_array[int_loop_count] = Distance(data->super_point_centroids[point_one->int_super_point_num], data->super_point_centroids[point_two->int_super_point_num], data->int_dimensions);
	int_num_dist_calc++;
	point_two = point_two->pNext;
      }

      point_one = point_one->pNext;
      int_outer_loop = 0;
      while(point_one != NULL){
	point_two = point_one->pNext;
	int_inner_loop = int_outer_loop + 1;
	while(point_two != NULL){
	  sp_pair_array[int_loop_count][0] = point_one;
	  sp_pair_array[int_loop_count][1] = point_two;

	  // gamma constraint
	  distance = abs(distance_array[int_outer_loop] - distance_array[int_inner_loop]);

	  if(distance > gamma){
	    distance_array[int_loop_count] = gamma + 1;
	  }
	  else{
	    distance_array[int_loop_count] = Distance(data->super_point_centroids[point_one->int_super_point_num], data->super_point_centroids[point_two->int_super_point_num], data->int_dimensions);
	    int_num_dist_calc++;
	  }

	  int_loop_count++;
	  point_two = point_two->pNext;
	  int_inner_loop++;
	}
	point_one = point_one->pNext;
	int_outer_loop++;
      }

      qsort(index_array, int_num_distances, sizeof(int), ComparisonFunction);

      int_pair_index = 0;
	
      while((int_pair_index < int_num_distances) && (Violate(sp_pair_array[index_array[int_pair_index]][0], sp_pair_array[index_array[int_pair_index]][1], data))){
	int_pair_index++;
      }

      if(int_pair_index == int_num_distances){
	break;
      }
      else{
	point_to_merge_one = sp_pair_array[index_array[int_pair_index]][0];
	point_to_merge_two = sp_pair_array[index_array[int_pair_index]][1];
      


      /* Method not using qsort 
    
      shortest_distance = Distance(data->super_point_centroids[point_one->int_super_point_num], data->super_point_centroids[point_two->int_super_point_num], data->int_dimensions);
      
      int_constraint_violated = Violate(point_one, point_two, data);
      
      if(!int_constraint_violated){    // if a constraint is not violated
	int_valid_cluster = 1;         // remember that there is at least one valid assignment
      }
      else{                            // if a constraint is violated
	int_valid_cluster = 0;         // remember that there are currently no valid assignments
      }
      
      point_one = data->super_point_head;
      while(point_one != NULL){
	point_two = point_one->pNext;
	while(point_two != NULL){
	    
	  distance = Distance(data->super_point_centroids[point_one->int_super_point_num], data->super_point_centroids[point_two->int_super_point_num], data->int_dimensions);
	  
	  int_constraint_violated = Violate(point_one, point_two, data);
	  
	  if(((distance < shortest_distance) || (int_valid_cluster == 0)) && (!int_constraint_violated)){
	    shortest_distance = distance;             // update the shortest distance to cluster centroid
	    point_to_merge_one = point_one;
	    point_to_merge_two = point_two; 
	    int_valid_cluster = 1;                    // indicate that there is at least one valid assignment
	  }
	    
	  point_two = point_two->pNext;
	}
	point_one = point_one->pNext;
      }

      if(!int_valid_cluster){
	break;
      }
      else{

      */

	int_iteration++;
	int_num_clusters--;
	
	if(int_iteration == int_rows_alloc){
	  if((temp = (int **)realloc(data->dendrogram, ((int_rows_alloc + 10) * sizeof(int *)))) == NULL){
	    fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
	    exit(1);
	  }
	  else{  
	    int_rows_alloc = int_rows_alloc + 10;
	    data->dendrogram = temp;
	  }
	}
	
	if((data->dendrogram[int_iteration] = (int *)malloc(data->int_num_data_points * sizeof(int))) == NULL){
	  fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
	  exit(1);
	}

	for(int_loop_count = 0; int_loop_count < data->int_num_data_points; int_loop_count++){

	  if(data->dendrogram[int_iteration - 1][int_loop_count] == point_to_merge_two->int_super_point_num){
	    data->dendrogram[int_iteration][int_loop_count] = point_to_merge_one->int_super_point_num;
	  }
	  else{
	    data->dendrogram[int_iteration][int_loop_count] = data->dendrogram[int_iteration - 1][int_loop_count];
	  }
	}
	
	// merge the two "super points"
	prev_point = point_to_merge_two->pPrev;
	next_point = point_to_merge_two->pNext;
	
	point_one_tail = point_to_merge_one->tail;
	point_two_head = point_to_merge_two->head;
	point_two_tail = point_to_merge_two->tail;
	
	point_one_tail->pSetNext = point_two_head;
	point_two_head->pSetPrev = point_one_tail;
	
	point_to_merge_one->tail = point_two_tail;
	point_to_merge_two->head = NULL;
	point_to_merge_two->tail = NULL;

	for(int_loop_count = 0; int_loop_count < data->int_dimensions; int_loop_count++){
	  data->super_point_centroids[point_to_merge_one->int_super_point_num][int_loop_count] = (point_to_merge_one->int_weight * data->super_point_centroids[point_to_merge_one->int_super_point_num][int_loop_count] + point_to_merge_two->int_weight * data->super_point_centroids[point_to_merge_two->int_super_point_num][int_loop_count]) / (point_to_merge_one->int_weight + point_to_merge_two->int_weight);
	  data->super_point_centroids[point_to_merge_two->int_super_point_num][int_loop_count] = -1;
	}
	point_to_merge_one->int_weight = point_to_merge_one->int_weight + point_to_merge_two->int_weight;
	
	// remove the old "super point"
	prev_point->pNext = next_point;
	if(next_point != NULL){
	  next_point->pPrev = prev_point;
	}
	free(point_to_merge_two);
	point_to_merge_two = NULL;

      }
    }

    if(int_num_clusters == 1){
      break;
    }
  }

  data->Kmin = int_num_clusters;
  printf("Total distance calculations: %d\n", int_num_dist_calc);

  free(index_array);
  free(distance_array);
  free(sp_pair_array);
}	


int ComparisonFunction(const void * index_one, const void * index_two)
{
  if(distance_array[*(int *)index_one] < distance_array[*(int *)index_two]){
    return -1;
  }
  else{
    if(distance_array[*(int *)index_one] == distance_array[*(int *)index_two]){
      return 0;
    }
    else{
      return 1;
    }
  }
}


void StoreResults(struct DATA * data)
{
  FILE * pFile;
  
  int int_outer_loop;
  int int_inner_loop;
  
  // create file for writing dendrogram
  if((pFile = fopen("dendrogram.csv", "w")) == NULL){
    fprintf(stderr, "\nThe dendrogram file could not be created.  ");
    fprintf(stderr, "The program will now terminate.\n\n");
    exit(1);
  }

  for(int_outer_loop = 0; int_outer_loop < data->int_num_data_points; int_outer_loop++){
    fprintf(pFile, "%d", int_outer_loop);
    if(int_outer_loop < data->int_num_data_points - 1){
      fprintf(pFile, ",");
    }
    else{
      fprintf(pFile, "\n");
    }
  }
  for(int_outer_loop = 0; int_outer_loop <= (data->Kmax - data->Kmin); int_outer_loop++){
    for(int_inner_loop = 0; int_inner_loop < data->int_num_data_points; int_inner_loop++){
      fprintf(pFile, "%d", data->dendrogram[int_outer_loop][int_inner_loop]);
      if(int_inner_loop < data->int_num_data_points - 1){
	fprintf(pFile, ",");
      }
      else{
	fprintf(pFile, "\n");
      }
    }
  }

  fclose(pFile);   // close file
}

int Violate(struct SUPER_POINT * super_point_one, struct SUPER_POINT * super_point_two, struct DATA * data)
{
  struct DATA_POINT * cur_point_one;
  struct DATA_POINT * cur_point_two;
  struct CONSTRAINT * cur_constraint;

  cur_point_one = super_point_one->head;
  while(cur_point_one != NULL){
    cur_constraint = data->CL_constraint_involvement[cur_point_one->int_index][HEAD];
    while(cur_constraint != NULL){
      cur_point_two = super_point_two->head;
      while(cur_point_two != NULL){
	if(cur_point_two->int_index == data->CL_constraints_array[cur_constraint->int_constraint_index][(cur_constraint->int_constraint_col + 1) % 2]){
	  return 1;
	}
	cur_point_two = cur_point_two->pSetNext;
      }
      cur_constraint = cur_constraint->pNext;
    }
    cur_point_one = cur_point_one->pSetNext;
  }

  return 0;
}
