// shared.c
// Evan Lord
// Created: July 23, 2007
// Last Modified: July 23, 2007


#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <mcheck.h>
#include "constants.h"
#include "struct_def.h"


float Distance(float *, float *, int);


float CalculateError(struct DATA * data)
{
  int int_outer_loop;
  int int_inner_loop;

  int int_num_points;
  int int_bad_points;
  int int_total_bad_points;
  int int_max_value;

  int int_count[data->int_num_labels];

  struct SUPER_POINT * super_point;

  struct DATA_POINT * data_point;

  float flt_error;

  int_total_bad_points = 0;
  flt_error = 0;
  
  for(int_outer_loop = 0; int_outer_loop < data->int_num_clusters; int_outer_loop++){

    for(int_inner_loop = 0; int_inner_loop < data->int_num_labels; int_inner_loop++){
      int_count[int_inner_loop] = 0;
    }
    int_num_points = 0;
    
    super_point = data->clusters[int_outer_loop];
    while(super_point != NULL){
      data_point = super_point->head;
      while(data_point != NULL){
	for(int_inner_loop = 0; int_inner_loop < data->int_num_labels; int_inner_loop++){
	  if(!strcmp(data->labels_array[data_point->int_index], data->labels_array[data->distinct_labels[int_inner_loop]])){
	    int_count[int_inner_loop]++;
	  }
	}
	data_point = data_point->pSetNext;
      }
      super_point = super_point->pClusterNext;
    }

    int_max_value = int_count[0];
    int_num_points = int_count[0];
    //printf("%d\t", int_count[0]);
    for(int_inner_loop = 1; int_inner_loop < data->int_num_labels; int_inner_loop++){
      if(int_max_value < int_count[int_inner_loop]){
	int_max_value = int_count[int_inner_loop];
      }
      int_num_points = int_num_points + int_count[int_inner_loop];
      //printf("%d\t", int_count[int_inner_loop]);
    }
    int_bad_points = int_num_points - int_max_value;
    int_total_bad_points = int_total_bad_points + int_bad_points;

    //printf("Bad: %d\n", int_bad_points);
  }

  flt_error = (float)int_total_bad_points / (float)data->int_num_data_points;
  return flt_error;

}


void CheckConstraintsSatisfied(struct DATA * data)
{
  int int_loop_count;

  data->int_num_ML_satisfied = 0;
  data->int_num_CL_satisfied = 0;

  for(int_loop_count = 0; int_loop_count < data->int_num_ML_constraints; int_loop_count++){
    if(data->ML_constraints_array[int_loop_count][2] == data->ML_constraints_array[int_loop_count][3]){
      data->int_num_ML_satisfied++;
    }
  }

  for(int_loop_count = 0; int_loop_count < data->int_num_CL_constraints; int_loop_count++){
    if(data->CL_constraints_array[int_loop_count][2] != data->CL_constraints_array[int_loop_count][3]){
      data->int_num_CL_satisfied++;
    }
  }
}


// This function creates a linked list of "super points".  Each "super point"
// corresponds to a single data point.  This allows functions to be shared
// among all versions of the K-Means algorithm

void CreateSuperPoints(struct DATA * data)
{
  // pointers for linked list manipulation
  struct DATA_POINT * new_node;
  struct DATA_POINT * cur_node; 
  struct SUPER_POINT * new_super;
  struct SUPER_POINT * cur_super;
 
  int int_outer_loop;
  int int_inner_loop;
  int int_loop_count;            // loop counter
  int int_super_point_count;     // number of "super points"
  
  struct DATA_POINT * set_lists[data->int_num_data_points][2];
  int set_assignment[data->int_num_data_points];

  //struct SUPER_POINT * cluster_ptrs[data->int_num_clusters];
  //struct SUPER_POINT * ML_ptrs_one[data->int_num_ML_constraints];
  //struct SUPER_POINT * ML_ptrs_two[data->int_num_ML_constraints];
  //struct SUPER_POINT * CL_ptrs_one[data->int_num_CL_constraints];
  //struct SUPER_POINT * CL_ptrs_two[data->int_num_CL_constraints];

  //data->clusters = cluster_ptrs;
  //data->ML_constraint_ptrs_one = ML_ptrs_one;
  //data->ML_constraint_ptrs_two = ML_ptrs_two;
  //data->CL_constraint_ptrs_one = CL_ptrs_one;
  //data->CL_constraint_ptrs_two = CL_ptrs_two;
  
  // allocate memory for the array containing head pointers for each cluster
  if((data->clusters = (struct SUPER_POINT **)malloc(data->int_num_clusters * sizeof(struct SUPER_POINT *))) == NULL){
    fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
    exit(1);
  }
  else{
    for(int_loop_count = 0; int_loop_count < data->int_num_clusters; int_loop_count++){
      if((data->clusters[int_loop_count] = (struct SUPER_POINT *)malloc(sizeof(struct SUPER_POINT))) == NULL){
	fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
	exit(1);
      }
      else{
	// initialize all pointers to NULL 
	data->clusters[int_loop_count] = NULL;
      }
    }
  }
  
  if((data->ML_constraint_ptrs = (struct SUPER_POINT ***)malloc(data->int_num_ML_constraints * sizeof(struct SUPER_POINT **))) == NULL){
    fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
    exit(1);
  }
  else{
    for(int_outer_loop = 0; int_outer_loop < data->int_num_ML_constraints; int_outer_loop++){
      if((data->ML_constraint_ptrs[int_outer_loop] = (struct SUPER_POINT **)malloc(2 * sizeof(struct SUPER_POINT *))) == NULL){
	fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
	exit(1);
      }
      else{
	for(int_inner_loop = 0; int_inner_loop < 2; int_inner_loop++){
	  if((data->ML_constraint_ptrs[int_outer_loop][int_inner_loop] = (struct SUPER_POINT *)malloc(sizeof(struct SUPER_POINT))) == NULL){
	    fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
	    exit(1);
	  }
	}
      }
    }
  }

  if((data->CL_constraint_ptrs = (struct SUPER_POINT ***)malloc(data->int_num_CL_constraints * sizeof(struct SUPER_POINT **))) == NULL){
    fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
    exit(1);
  }
  else{
    for(int_outer_loop = 0; int_outer_loop < data->int_num_CL_constraints; int_outer_loop++){
      if((data->CL_constraint_ptrs[int_outer_loop] = (struct SUPER_POINT **)malloc(2 * sizeof(struct SUPER_POINT *))) == NULL){
	fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
	exit(1);
      }
      else{
	for(int_inner_loop = 0; int_inner_loop < 2; int_inner_loop++){
	  if((data->CL_constraint_ptrs[int_outer_loop][int_inner_loop] = (struct SUPER_POINT *)malloc(sizeof(struct SUPER_POINT))) == NULL){
	    fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
	    exit(1);
	  }
	}
      }
    }
  }

  // create linked list of nodes containing a data point index
  new_node = (struct DATA_POINT *)malloc(sizeof(struct DATA_POINT));
  new_node->int_index = 0;
  new_node->int_cluster = -1;  
  new_node->int_set = 0;
  new_node->pPrev = NULL;
  new_node->pNext = NULL;
  new_node->pClusterPrev = NULL;
  new_node->pClusterNext = NULL;
  new_node->pSetPrev = NULL;
  new_node->pSetNext = NULL;
  data->head_point = new_node;
  set_lists[0][HEAD] = new_node;
  set_lists[0][TAIL] = new_node;
  set_assignment[0] = 0;
  cur_node = new_node;
  new_node = NULL;
  for(int_loop_count = 1; int_loop_count < data->int_num_data_points; int_loop_count++){
    new_node = (struct DATA_POINT *)malloc(sizeof(struct DATA_POINT));
    new_node->int_index = int_loop_count;
    new_node->int_cluster = -1;
    new_node->int_set = int_loop_count;
    new_node->pPrev = cur_node;
    new_node->pNext = NULL;
    new_node->pClusterPrev = NULL;
    new_node->pClusterNext = NULL;
    new_node->pSetPrev = NULL;
    new_node->pSetNext = NULL;
    cur_node->pNext = new_node;
    set_lists[int_loop_count][HEAD] = new_node;
    set_lists[int_loop_count][TAIL] = new_node;
    set_assignment[int_loop_count] = int_loop_count;
    cur_node = new_node;
    new_node = NULL;
  }

  cur_node = data->head_point;

  new_super = (struct SUPER_POINT *)malloc(sizeof(struct SUPER_POINT));
  new_super->int_super_point_num = 0;
  new_super->int_cluster = -1;
  new_super->int_constraint = 0;
  new_super->head = cur_node;
  new_super->int_weight = 1;
  new_super->pPrev = NULL;
  new_super->pNext = NULL;
  new_super->pClusterPrev = NULL;
  new_super->pClusterNext = NULL;
  data->super_point_head = new_super;

  if(data->int_algorithm_version == 2){
    // loop over first two columns of the ML constraint array
    for(int_outer_loop = 0; int_outer_loop < 2; int_outer_loop++){
      // loop over all ML constraints
      for(int_inner_loop = 0; int_inner_loop < data->int_num_ML_constraints; int_inner_loop++){
	// if the point is involved in an ML constraint
	if(data->ML_constraints_array[int_inner_loop][int_outer_loop] == cur_node->int_index){
	  data->ML_constraint_ptrs[int_inner_loop][int_outer_loop] = new_super;
	  new_super->int_constraint = 1;
	}
      }
    }
    // loop over first two columns of the CL constraint array
    for(int_outer_loop = 0; int_outer_loop < 2; int_outer_loop++){
      // loop over all CL constraints
      for(int_inner_loop = 0; int_inner_loop < data->int_num_CL_constraints; int_inner_loop++){
	// if the point is involved in an CL constraint
	if(data->CL_constraints_array[int_inner_loop][int_outer_loop] == cur_node->int_index){
	  data->CL_constraint_ptrs[int_inner_loop][int_outer_loop] = new_super;
	  new_super->int_constraint = 1;
	}
      }
    }
  }

  /*
  if(data->int_algorithm_version == 2){
    // loop over all ML constraints
    for(int_loop_count = 0; int_loop_count < data->int_num_ML_constraints; int_loop_count++){
      // if the point is involved in an ML constraint
      if(data->ML_constraints_array[int_loop_count][0] == cur_node->int_index){
	data->ML_constraint_ptrs_one[int_loop_count] = new_super;
	new_super->int_constraint = 1;
      }
      else if(data->ML_constraints_array[int_loop_count][1] == cur_node->int_index){
	data->ML_constraint_ptrs_two[int_loop_count] = new_super;
	new_super->int_constraint = 1;
      }
    }   
    // loop over all CL constraints
    for(int_inner_loop = 0; int_inner_loop < data->int_num_CL_constraints; int_inner_loop++){
      // if the point is involved in an CL constraint
      if(data->CL_constraints_array[int_inner_loop][0] == cur_node->int_index){
	data->CL_constraint_ptrs_one[int_loop_count] = new_super;
	new_super->int_constraint = 1;
      }
      else if(data->CL_constraints_array[int_loop_count][1] == cur_node->int_index){
	data->CL_constraint_ptrs_two[int_loop_count] = new_super;
	new_super->int_constraint = 1;
      }
    }
  }
  */

  cur_super = new_super;
  new_super = NULL;
  int_super_point_count = 1;
  cur_node = cur_node->pNext;

  while(cur_node != NULL){
 
    new_super = (struct SUPER_POINT *)malloc(sizeof(struct SUPER_POINT));
    new_super->int_super_point_num = int_super_point_count;
    new_super->int_cluster = -1;
    new_super->int_constraint = 0;
    new_super->head = cur_node;
    new_super->int_weight = 1;


   
    if(data->int_algorithm_version == 2){
      // loop over first two columns of the ML constraint array
      for(int_outer_loop = 0; int_outer_loop < 2; int_outer_loop++){
	// loop over all ML constraints
	for(int_inner_loop = 0; int_inner_loop < data->int_num_ML_constraints; int_inner_loop++){
	  // if the point is involved in an ML constraint
	  if(data->ML_constraints_array[int_inner_loop][int_outer_loop] == cur_node->int_index){
	    data->ML_constraint_ptrs[int_inner_loop][int_outer_loop] = new_super;
	    new_super->int_constraint = 1;
	  }
	}
      }
      // loop over first two columns of the CL constraint array
      for(int_outer_loop = 0; int_outer_loop < 2; int_outer_loop++){
	// loop over all CL constraints
	for(int_inner_loop = 0; int_inner_loop < data->int_num_CL_constraints; int_inner_loop++){
	  // if the point is involved in an CL constraint
	  if(data->CL_constraints_array[int_inner_loop][int_outer_loop] == cur_node->int_index){
	    data->CL_constraint_ptrs[int_inner_loop][int_outer_loop] = new_super;
	    new_super->int_constraint = 1;
	  }
	}
      }
    }
    
    /*
    if(data->int_algorithm_version == 2){
      // loop over all ML constraints
      for(int_loop_count = 0; int_loop_count < data->int_num_ML_constraints; int_loop_count++){
	// if the point is involved in an ML constraint
	if(data->ML_constraints_array[int_loop_count][0] == cur_node->int_index){
	  data->ML_constraint_ptrs_one[int_loop_count] = new_super;
	  new_super->int_constraint = 1;
	}
	else if(data->ML_constraints_array[int_loop_count][1] == cur_node->int_index){
	  data->ML_constraint_ptrs_two[int_loop_count] = new_super;
	  new_super->int_constraint = 1;
	}
      }   
      // loop over all CL constraints
      for(int_inner_loop = 0; int_inner_loop < data->int_num_CL_constraints; int_inner_loop++){
	// if the point is involved in an CL constraint
	if(data->CL_constraints_array[int_inner_loop][0] == cur_node->int_index){
	  data->CL_constraint_ptrs_one[int_loop_count] = new_super;
	  new_super->int_constraint = 1;
	}
	else if(data->CL_constraints_array[int_loop_count][1] == cur_node->int_index){
	  data->CL_constraint_ptrs_two[int_loop_count] = new_super;
	  new_super->int_constraint = 1;
	}
      }
    }
    */
    
    new_super->pPrev = cur_super;
    new_super->pNext = NULL;
    cur_super->pNext = new_super;
  

    new_super->pClusterPrev = NULL;
    new_super->pClusterNext = NULL;
    cur_super = new_super;
    new_super = NULL;
    int_super_point_count++;
    cur_node = cur_node->pNext;
  }

  data->super_point_centroids = data->data_array;

}



// This function calculates the distance between two data points.  It receives the
// coordinates of the two points as float arrays as well as the number of 
// dimensions.  It returns the scalar Euclidean distance between the points.

float Distance(float * point_one, float * point_two, int int_dim)
{
  float difference;            
  float squared_difference;
  int int_loop_count;
  
  float distance;
  float sum = 0;

  // loop over all coordinates
  for(int_loop_count = 0; int_loop_count < int_dim; int_loop_count++){

    // take difference between point one and point two for current coordinate
    difference = point_one[int_loop_count] - point_two[int_loop_count]; 

    // square the difference
    squared_difference = difference * difference;

    // add the squared difference to the sum of squared differences
    sum = sum + squared_difference;
  }

  // take the square root of the sum to calculate the distance
  distance = sqrt(sum);

  return distance;
}


// This function prints the data contained in the DATA struct.  It receives the DATA struct.

void PrintData(struct DATA * data_to_print)
{
  // loop counters
  int int_outer_loop;
  int int_inner_loop;

  struct CONSTRAINT * cur_ML_constraint;
  struct CONSTRAINT * cur_CL_constraint;

  struct DATA_POINT * cur_point;
  struct SUPER_POINT * cur_super_point;

  
  // print 2D array of data
  for(int_outer_loop = 0; int_outer_loop < data_to_print->int_num_data_points; int_outer_loop++){
    printf("%d ", int_outer_loop);
    for(int_inner_loop = 0; int_inner_loop < data_to_print->int_dimensions; int_inner_loop++){
      printf("%f ", data_to_print->data_array[int_outer_loop][int_inner_loop]);
    }
    printf("\n");
  }
  

  printf("\nML Constraints\n");

  // print 2D array of ML constraints
  for(int_outer_loop = 0; int_outer_loop < data_to_print->int_num_ML_constraints; int_outer_loop++){
    printf("%d\t", int_outer_loop);
    for(int_inner_loop = 0; int_inner_loop < CONSTRAINT_COLS + 2; int_inner_loop++){
      printf("%d\t", data_to_print->ML_constraints_array[int_outer_loop][int_inner_loop]);
    }
    printf("\n");
  }

  printf("\nCL Constraints\n");

  // print 2D array of CL constraints
  for(int_outer_loop = 0; int_outer_loop < data_to_print->int_num_CL_constraints; int_outer_loop++){
    printf("%d\t", int_outer_loop);
    for(int_inner_loop = 0; int_inner_loop < CONSTRAINT_COLS + 2; int_inner_loop++){
      printf("%d\t", data_to_print->CL_constraints_array[int_outer_loop][int_inner_loop]);
    }
    printf("\n");
  } 
  
  
  if(data_to_print->int_algorithm_version != 0){
    printf("\nConstraint Involvement\n");
    
    for(int_outer_loop = 0; int_outer_loop < data_to_print->int_num_data_points; int_outer_loop++){
      printf("%d\t", int_outer_loop);
      cur_ML_constraint = data_to_print->ML_constraint_involvement[int_outer_loop][HEAD];
      while(cur_ML_constraint != NULL){
	printf("(+1 %d) ", cur_ML_constraint->int_other_point);
	cur_ML_constraint = cur_ML_constraint->pNext;
      }
      cur_CL_constraint = data_to_print->CL_constraint_involvement[int_outer_loop][HEAD];
      while(cur_CL_constraint != NULL){
	printf("(-1 %d) ", cur_CL_constraint->int_other_point);
	cur_CL_constraint = cur_CL_constraint->pNext;
      }
      printf("\n");
    }
  }

  printf("\nCluster Lists\n");

  for(int_outer_loop = 0; int_outer_loop < data_to_print->int_num_clusters; int_outer_loop++){
    printf("%d\n", int_outer_loop);
    cur_super_point = data_to_print->clusters[int_outer_loop];
    while(cur_super_point != NULL){
      //printf("%d ", cur_super_point->int_super_point_num);
      cur_point = cur_super_point->head;
      while(cur_point != NULL){
	printf("%d ", cur_point->int_index);
	cur_point = cur_point->pSetNext;
      }
      cur_super_point = cur_super_point->pClusterNext;
    }
    printf("\n");
  }

  
  printf("\nLabels\n");

  for(int_outer_loop = 0; int_outer_loop < data_to_print->int_num_data_points; int_outer_loop++){
    printf("%d %s\n", int_outer_loop, data_to_print->labels_array[int_outer_loop]);
  }

  printf("\n%d Distinct Labels\n", data_to_print->int_num_labels);

  for(int_outer_loop = 0; int_outer_loop < data_to_print->int_num_labels; int_outer_loop++){
    printf("%d %s\n", int_outer_loop, data_to_print->labels_array[data_to_print->distinct_labels[int_outer_loop]]);
  }
  

  printf("\n\n");
}


// This function writes the final cluster assignments and cluster centroids to two separate files.  
// It receives the DATA struct containing all of the data

void WriteResults(struct DATA * data)
{
  FILE * pFileToWrite;             // pointer to a file in which to write the results

  struct DATA_POINT * cur_node;    // pointer used in walking linked list 

  // loop counters
  int int_inner_loop;
  int int_outer_loop;

  float flt_original;      // data point value returned to original scale

  // create file for writing cluster assignments
  if((pFileToWrite = fopen("assignment.results", "w")) == NULL){
    fprintf(stderr, "\nThe results file could not be created.  ");
    fprintf(stderr, "The program will now terminate.\n\n");
    exit(1);
  }

  // loop through data point linked list and write each cluster number to the file
  cur_node = data->head_point;
  while(cur_node != NULL){
    fprintf(pFileToWrite, "%d\n", cur_node->int_cluster);
    cur_node = cur_node->pNext;
  }
  
  fclose(pFileToWrite);   // close file

  // create file for writing cluster centroid coordinates
  if((pFileToWrite = fopen("centroid.results", "w")) == NULL){
    fprintf(stderr, "\nThe centroids file could not be created.  ");
    fprintf(stderr, "The program will now terminate.\n\n");
    exit(1);
  }

  // print coordinates of cluster centroids
  for(int_outer_loop = 0; int_outer_loop < data->int_num_clusters; int_outer_loop++){
    fprintf(pFileToWrite, "%d\t", int_outer_loop);
    for(int_inner_loop = 0; int_inner_loop < data->int_dimensions; int_inner_loop++){

      // convert value back to original scale
      flt_original = data->cluster_centroids[int_outer_loop][int_inner_loop];
      flt_original = flt_original * (data->maxes[int_inner_loop] - data->mins[int_inner_loop]) + data->mins[int_inner_loop];
      fprintf(pFileToWrite, "%f\t", flt_original);
    }
    fprintf(pFileToWrite, "\n");
  }

  fclose(pFileToWrite);   // close file
}
