// copkmeans.c
// Evan Lord
// Created: July 31, 2007
// Last Modified: July 31, 2007


#include <stdlib.h>
#include <stdio.h>
#include <mcheck.h>
#include <math.h>
#include <mcheck.h>
#include "constants.h"
#include "struct_def.h"


// This function uses ML constraints to generate connected components of
// multiple data points.  It receives the DATA struct containing the
// ML constraint array and updates the struct with a linked list of
// "super points" which can be connected components or individual data
// points

void MLClosure(struct DATA * data)
{
  // pointers used in linked list manipulation

  struct DATA_POINT * new_node;
  struct DATA_POINT * cur_node;
  struct DATA_POINT * temp_node;
  struct DATA_POINT * cur_head;
  struct DATA_POINT * cur_tail;
  struct DATA_POINT * set_one_tail;
  struct DATA_POINT * set_two_head;
  struct DATA_POINT * set_two_tail;

  struct SUPER_POINT * new_super;
  struct SUPER_POINT * cur_super;

  struct CONSTRAINT * cur_constraint;
  
  // loop counters
  int int_loop_count;    
  int int_inner_loop;

  int int_point_one;          // index of first point involved in ML constraint
  int int_point_two;          // index of second point involved in ML constraint
  int int_super_point_count;  // number of "super points" after creating connected components
  int int_num_nodes;          // number of points in a connected component
  int int_next_point;         

  float temp[data->int_dimensions];
  
  struct DATA_POINT * set_lists[data->int_num_data_points][2];
  
  //struct SUPER_POINT * cluster_ptrs[data->int_num_clusters];
  //data->clusters = cluster_ptrs;

  // allocate memory for the array containing head pointers for each cluster
  if((data->clusters = (struct SUPER_POINT **)malloc(data->int_num_clusters * sizeof(struct SUPER_POINT *))) == NULL){
    fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
    exit(1);
  }
  else{
    for(int_loop_count = 0; int_loop_count < data->int_num_clusters; int_loop_count++){
      if((data->clusters[int_loop_count] = (struct SUPER_POINT *)malloc(sizeof(struct SUPER_POINT))) == NULL){
	fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
	exit(1);
      }
      else{
	// initialize all pointers to NULL 
	data->clusters[int_loop_count] = NULL;
      }
    }
  }
 
  // allocate memory for the array to hold the set assignments of all points
  if((data->dendrogram = (int **)malloc(sizeof(int *))) == NULL){
    fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
    exit(1);
  }
  else{
    if((data->dendrogram[0] = (int *)malloc(data->int_num_data_points * sizeof(int))) == NULL){
      fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
      exit(1);
    }
  }

  // create linked list of nodes containing a data point index and the cluster
  // to which the data point is assigned
  new_node = (struct DATA_POINT *)malloc(sizeof(struct DATA_POINT));
  new_node->int_index = 0;             // start with data point 0
  new_node->int_cluster = -1;          // make sure it's not assigned to a cluster
  new_node->int_set = 0;               // initially in set 0
  new_node->pPrev = NULL;            
  new_node->pNext = NULL;
  new_node->pClusterPrev = NULL;
  new_node->pClusterNext = NULL;
  new_node->pSetPrev = NULL;
  new_node->pSetNext = NULL;
  data->head_point = new_node;         // make this node the head of the data point list
  set_lists[0][HEAD] = new_node;       // make this node the head of set 0 list
  set_lists[0][TAIL] = new_node;       // make this node the tail of set 0 list
  data->dendrogram[0][0] = 0;          // remember that this node is assigned to set 0
  cur_node = new_node;
  new_node = NULL;
  for(int_loop_count = 1; int_loop_count < data->int_num_data_points; int_loop_count++){
    new_node = (struct DATA_POINT *)malloc(sizeof(struct DATA_POINT));
    new_node->int_index = int_loop_count;   // create node for each data point
    new_node->int_cluster = -1;             // make sure they're not assigned to clusters
    new_node->int_set = int_loop_count;     // each node is initially in its own set
    new_node->pPrev = cur_node;             // connect new node to previous node
    new_node->pNext = NULL;
    new_node->pClusterPrev = NULL;
    new_node->pClusterNext = NULL;
    new_node->pSetPrev = NULL;
    new_node->pSetNext = NULL;
    cur_node->pNext = new_node;             // connect previous node to new node
    set_lists[int_loop_count][HEAD] = new_node;  // make this node the head of its set list
    set_lists[int_loop_count][TAIL] = new_node;  // make this node the tail of its set list
    data->dendrogram[0][int_loop_count] = int_loop_count;   // remember what set it's assigned to
    cur_node = new_node;
    new_node = NULL;
  }

  // loop through array of ML constraints
  for(int_loop_count = 0; int_loop_count < data->int_num_ML_constraints; int_loop_count++){

    // get the index of the first point involved in the current constraint
    int_point_one = data->ML_constraints_array[int_loop_count][0];
    // get the index of the second point involved in the current constraint
    int_point_two = data->ML_constraints_array[int_loop_count][1];

    // if point 1 and point 2 are assigned t;o different sets
    if(data->dendrogram[0][int_point_one] != data->dendrogram[0][int_point_two]){

      // mark the tail of the set point 1 belongs to
      set_one_tail = set_lists[data->dendrogram[0][int_point_one]][TAIL];
      // mark the head of the set point 2 belongs to
      set_two_head = set_lists[data->dendrogram[0][int_point_two]][HEAD];
      // mark the tail of the set point 2 belongs to
      set_two_tail = set_lists[data->dendrogram[0][int_point_two]][TAIL];

      // connect the tail of the set point 1 belongs to to the head of the set point 2
      // belongs to
      set_one_tail->pSetNext = set_two_head;
      set_lists[data->dendrogram[0][int_point_two]][HEAD] = NULL;

      // connect the head of the set point 2 belongs to to the tail of the set point 1
      // belongs to
      set_two_head->pSetPrev = set_one_tail;

      // update the tail of the combined set
      set_lists[data->dendrogram[0][int_point_one]][TAIL] = set_two_tail;
      set_lists[data->dendrogram[0][int_point_two]][TAIL] = NULL;

      // loop through all points belonging to the set originally containing point 2
      while(set_two_head != NULL){
	data->dendrogram[0][set_two_head->int_index] = data->dendrogram[0][int_point_one];  // change set assignment
	set_two_head = set_two_head->pSetNext;  // move to next point
      }
      set_two_head = NULL;

    }

  }

  int_next_point = 1;
  cur_head = set_lists[0][HEAD];
  cur_tail = set_lists[0][TAIL];

  // loop until finding first non-empty set
  while(cur_head == NULL){
    cur_head = set_lists[int_next_point][HEAD];
    cur_tail = set_lists[int_next_point][TAIL];
    int_next_point++;  // maintain index of first non-empty set
  }

  // Create a "super point" for each set of data point(s)
  new_super = (struct SUPER_POINT *)malloc(sizeof(struct SUPER_POINT));
  new_super->int_super_point_num = 0;     // start with "super point" 0
  new_super->int_cluster = -1;            // make sure it's not assigned to a cluster yet
  new_super->int_constraint = 0;
  new_super->head = cur_head;             // connect it to the first non-empty set
  new_super->tail = cur_tail;

  
  temp_node = cur_head;
  while(temp_node != NULL){
    data->dendrogram[0][temp_node->int_index] = 0;
    temp_node = temp_node->pSetNext;
  }
 

  new_super->pPrev = NULL;
  new_super->pNext = NULL;
  new_super->pClusterPrev = NULL;
  new_super->pClusterNext = NULL;
  data->super_point_head = new_super;     // make this the head of the "super point" list
  cur_super = new_super;
  new_super = NULL;
  int_super_point_count = 1;              // remember the number of "super points" created

  // loop through remainder of set lists
  for(int_loop_count = int_next_point; int_loop_count < data->int_num_data_points; int_loop_count++){

    cur_head = set_lists[int_loop_count][HEAD];
    cur_tail = set_lists[int_loop_count][TAIL];

    if(cur_head != NULL){    // if a set list is not empty
      
      // Create a super point for that set
      new_super = (struct SUPER_POINT *)malloc(sizeof(struct SUPER_POINT));
      new_super->int_super_point_num = int_super_point_count;
      new_super->head = cur_head;         // connect to non-empty set of data points
      new_super->tail = cur_tail;

     
      temp_node = cur_head;
      while(temp_node != NULL){
	data->dendrogram[0][temp_node->int_index] = int_super_point_count;
	temp_node = temp_node->pSetNext;
      }
      

      new_super->int_cluster = -1;        // make sure "super point" is not assigned to a cluster
      new_super->int_constraint = 0;
      new_super->pPrev = cur_super;       // connect this "super point" to the previous one
      new_super->pNext = NULL;
      cur_super->pNext = new_super;       // connect previous "super point" to this one
      new_super->pClusterPrev = NULL;
      new_super->pClusterNext = NULL;
      cur_super = new_super;
      new_super = NULL;
      int_super_point_count++;            // increment the number of "super points" created

    }
  }

  // check for CL conflicts within connected components
  cur_super = data->super_point_head;
  while(cur_super != NULL){
    cur_node = cur_super->head;
    while(cur_node != NULL){
      cur_constraint = data->CL_constraint_involvement[cur_node->int_index][HEAD];
      while(cur_constraint != NULL){
	temp_node = cur_super->head;
	  while(temp_node != NULL){
	    if(cur_node != temp_node){
	      if(temp_node->int_index == data->CL_constraints_array[cur_constraint->int_constraint_index][(cur_constraint->int_constraint_col + 1) % 2]){
		fprintf(stderr, "There is a conflict between ML and CL constraints.  The program will now terminate.\n");
		exit(1);
	      }
	    }
	    temp_node = temp_node->pSetNext;
	  }
	cur_constraint = cur_constraint->pNext;
      }
      cur_node = cur_node->pSetNext;
    }
    cur_super = cur_super->pNext;
  }

  data->Kmax = int_super_point_count;

  // allocate memory for the array to hold the super point centroids
  if((data->super_point_centroids = (float **)malloc(int_super_point_count * sizeof(float *))) == NULL){
    fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
    exit(1);
  }
  else{
    for(int_loop_count = 0; int_loop_count < int_super_point_count; int_loop_count++){
      if((data->super_point_centroids[int_loop_count] = (float *)malloc(data->int_dimensions * sizeof(float))) == NULL){
	fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
	exit(1);
      }
    }
  }

  /************************Calculate "super point" centroids****************************/

  cur_super = data->super_point_head;  // start at head of "super point" list
  while(cur_super != NULL){            // walk entire list of "super points"

    //printf("%d\t", cur_super->int_super_point_num);

    // initialize temp array to zero
    for(int_loop_count = 0; int_loop_count < data->int_dimensions; int_loop_count++){
      temp[int_loop_count] = 0;
    }

    int_num_nodes = 0;    // initialize number of points in "super point" to zero

    cur_node = cur_super->head;       // start at first data point in current "super point"
    while(cur_node != NULL){          // loop through all data points contained in "super point"

      //printf("%d ", cur_node->int_index);

      cur_node->int_set = cur_super->int_super_point_num;   // update set assignment numbers

      // add the coordinates of the current data point to the temp array
      for(int_inner_loop = 0; int_inner_loop < data->int_dimensions; int_inner_loop++){
	temp[int_inner_loop] = temp[int_inner_loop] + data->data_array[cur_node->int_index][int_inner_loop];
      }

      cur_node = cur_node->pSetNext;   // move to the next data point in the "super point"
      int_num_nodes++;                 // increment the number of data points in the "super point"
    }
    //printf("\n");

    // divide the temp array by the number of points in the current "super point"
    for(int_inner_loop = 0; int_inner_loop < data->int_dimensions; int_inner_loop++){
      temp[int_inner_loop] = temp[int_inner_loop] / int_num_nodes;
    } 
    
    
    // update the current super point centroid coordinates with the temp array
    for(int_inner_loop = 0; int_inner_loop < data->int_dimensions; int_inner_loop++){
      data->super_point_centroids[cur_super->int_super_point_num][int_inner_loop] = temp[int_inner_loop];
    }
   
    cur_super->int_weight = int_num_nodes;      // save the number of nodes as the "super point" weight

    cur_super = cur_super->pNext;               // move to the next "super point" in the list
  }
  
}


void SuperPointConstraints(struct DATA * data)
{
  int int_super_point_num;
  int int_dendro_num;
  int int_loop_count;
  int int_flag;
  int int_largest_index;

  struct SUPER_POINT * cur_super;
  //struct SUPER_POINT * other_super;

  struct CONSTRAINT * prev_constraint;
  struct CONSTRAINT * cur_constraint;
  struct CONSTRAINT * next_constraint;
  struct CONSTRAINT * remove_constraint;
  struct CONSTRAINT * largest_constraint;
  struct CONSTRAINT * marked_constraint;
  struct CONSTRAINT * marked_prev;

  struct DATA_POINT * head_point;
  //struct DATA_POINT * other_point;
  
  int super_point_index[data->int_num_data_points];
  
  if((data->SP_CL_constraints = (struct CONSTRAINT ***)malloc(data->Kmax * sizeof(struct CONSTRAINT **))) == NULL){
    fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
    exit(1);
  }
  else{
    for(int_loop_count = 0; int_loop_count < data->Kmax; int_loop_count++){
      if((data->SP_CL_constraints[int_loop_count] = (struct CONSTRAINT **)malloc(2 * sizeof(struct CONSTRAINT *))) == NULL){
	fprintf(stderr, "\nMemory allocation error.  Closing program.\n\n");
	exit(1);
      }
      else{	
	// initialize all pointers to NULL 
	data->SP_CL_constraints[int_loop_count][HEAD] = NULL;
	data->SP_CL_constraints[int_loop_count][TAIL] = NULL;
      }
    }
  }

  cur_super = data->super_point_head;

  //Create super point index
  while(cur_super != NULL){
    int_super_point_num = cur_super->int_super_point_num;
    head_point = cur_super->head;
    int_dendro_num = data->dendrogram[0][head_point->int_index];
    for(int_loop_count = 0; int_loop_count < data->int_num_data_points; int_loop_count++){
      if(data->dendrogram[0][int_loop_count] == int_dendro_num){
	super_point_index[int_loop_count] = int_super_point_num;
      }
    }
    cur_super = cur_super->pNext;
  }

  //Convert "other point" involved in constraint to "other super point" involved in constraint
  for(int_loop_count = 0; int_loop_count < data->int_num_data_points; int_loop_count++){
    cur_constraint = data->CL_constraint_involvement[int_loop_count][HEAD];
    while(cur_constraint != NULL){
      cur_constraint->int_other_point = super_point_index[cur_constraint->int_other_point];
      cur_constraint = cur_constraint->pNext;
    }
  }

  cur_super = data->super_point_head;

  while(cur_super != NULL){
    int_super_point_num = cur_super->int_super_point_num;
    head_point = cur_super->head;
    int_flag = 0;
    for(int_loop_count = 0; int_loop_count < data->int_num_data_points; int_loop_count++){
      if(super_point_index[int_loop_count] == int_super_point_num){
	
	if(data->SP_CL_constraints[int_super_point_num][TAIL] == NULL){
	  data->SP_CL_constraints[int_super_point_num][HEAD] = data->CL_constraint_involvement[int_loop_count][HEAD];
	  data->SP_CL_constraints[int_super_point_num][TAIL] = data->CL_constraint_involvement[int_loop_count][TAIL];
	  data->CL_constraint_involvement[int_loop_count][HEAD] = NULL;
	  data->CL_constraint_involvement[int_loop_count][TAIL] = NULL;
	  //int_flag = 1;
	}
	else{
	  cur_constraint = data->SP_CL_constraints[int_super_point_num][TAIL];
	  cur_constraint->pNext = data->CL_constraint_involvement[int_loop_count][HEAD];
	  data->SP_CL_constraints[int_super_point_num][TAIL] = data->CL_constraint_involvement[int_loop_count][TAIL];
	  data->CL_constraint_involvement[int_loop_count][HEAD] = NULL;
	  data->CL_constraint_involvement[int_loop_count][TAIL] = NULL;
	}
      }
    }
    
    cur_super = cur_super->pNext;
  }

  //Sort constraint lists and remove repetitive elements
  for(int_loop_count = 0; int_loop_count < data->Kmax; int_loop_count++){
    marked_constraint = data->SP_CL_constraints[int_loop_count][HEAD];
    marked_prev = NULL;
    if((marked_constraint != NULL) && (marked_constraint->pNext != NULL)){
      while(marked_constraint != NULL){
	cur_constraint = marked_constraint;
	next_constraint = cur_constraint->pNext;
	prev_constraint = marked_prev;
	largest_constraint = cur_constraint;
	int_largest_index = cur_constraint->int_other_point;
	while(next_constraint != NULL){
	  if(next_constraint->int_other_point > int_largest_index){
	    int_largest_index = next_constraint->int_other_point;
	    largest_constraint = next_constraint;
	    prev_constraint = cur_constraint;
	    cur_constraint = next_constraint;
	    next_constraint = next_constraint->pNext;
	  }
	  else if(next_constraint->int_other_point == int_largest_index){
	    remove_constraint = next_constraint;
	    next_constraint = remove_constraint->pNext;
	    cur_constraint->pNext = next_constraint;
	    free(remove_constraint);
	    remove_constraint = NULL;
	  }
	  else{
	    cur_constraint = next_constraint;
	    next_constraint = next_constraint->pNext;
	  }
	}
	
	if(largest_constraint == marked_constraint){
	  marked_prev = marked_constraint;
	  marked_constraint = marked_prev->pNext;
	}
	else if(marked_prev == NULL){
	  prev_constraint->pNext = largest_constraint->pNext;
	  largest_constraint->pNext = marked_constraint;
	  data->SP_CL_constraints[int_loop_count][HEAD] = largest_constraint;
	  marked_prev = largest_constraint;
	  marked_constraint = marked_prev->pNext;
	}
	else{
	  prev_constraint->pNext = largest_constraint->pNext;
	  largest_constraint->pNext = marked_constraint;
	  marked_prev->pNext = largest_constraint;
	  marked_prev = largest_constraint;
	  marked_constraint = marked_prev->pNext;
	}
      }
    }
  }

  /*
  for(int_loop_count = 0; int_loop_count < data->Kmax; int_loop_count++){
    printf("%d\t", int_loop_count);
    cur_constraint = data->SP_CL_constraints[int_loop_count][HEAD];
    while(cur_constraint != NULL){
      printf("%d ", cur_constraint->int_other_point);
      cur_constraint = cur_constraint->pNext;
    }
    printf("\n");
  }
  */
}


// This function checks whether or not an assignment of a "super point" to a cluster violates
// a CL constraint.  It receives a pointer to the "super point", the index of the cluster, and
// the DATA struct.  It returns 1 if a constraint is violated and 0 if no constraints are 
// violated.

int ViolateConstraints(struct SUPER_POINT * point_to_check, int int_cluster_number, struct DATA * data)
{

  struct CONSTRAINT * cur_constraint;              // current constraint in constraint involvement list
                                                   // for a particular data point index
  struct DATA_POINT * cur_point_in_super_point;    // data point in "super point"
  struct DATA_POINT * cur_point_in_cluster;        // data point in cluster
  struct SUPER_POINT * cur_super_point_in_cluster; // "super point" in cluster
 
  //int int_outer_loop;
  //int int_inner_loop;
  //int int_check_cluster_for;
  
  cur_point_in_super_point = point_to_check->head;
  while(cur_point_in_super_point != NULL){
    cur_constraint = data->CL_constraint_involvement[cur_point_in_super_point->int_index][HEAD];
    while(cur_constraint != NULL){
      cur_super_point_in_cluster = data->clusters[int_cluster_number];
      while(cur_super_point_in_cluster != NULL){
	cur_point_in_cluster = cur_super_point_in_cluster->head;
	while(cur_point_in_cluster != NULL){
	  if(cur_point_in_cluster->int_index == data->CL_constraints_array[cur_constraint->int_constraint_index][(cur_constraint->int_constraint_col + 1) % 2]){
	    return 1;
	  }
	  cur_point_in_cluster = cur_point_in_cluster->pSetNext;
	}
	cur_super_point_in_cluster = cur_super_point_in_cluster->pClusterNext;
      }
      cur_constraint = cur_constraint->pNext;
    }
    cur_point_in_super_point = cur_point_in_super_point->pSetNext;
  }
  
  // Alternate method
  /* 
  for(int_outer_loop = 0; int_outer_loop < 2; int_outer_loop++){
    for(int_inner_loop = 0; int_inner_loop < data->int_num_CL_constraints; int_inner_loop++){
      cur_point_in_super_point = point_to_check->head;
      while((cur_point_in_super_point != NULL) && (cur_point_in_super_point->int_index != data->CL_constraints_array[int_inner_loop][int_outer_loop])){
	cur_point_in_super_point = cur_point_in_super_point->pSetNext;
      }
      if(cur_point_in_super_point != NULL){
	int_check_cluster_for = data->CL_constraints_array[int_inner_loop][!int_outer_loop];
	cur_super_point_in_cluster = data->clusters[int_cluster_number];
	while(cur_super_point_in_cluster != NULL){
	  cur_point_in_cluster = cur_super_point_in_cluster->head;
	  while(cur_point_in_cluster != NULL){
	    if(cur_point_in_cluster->int_index == int_check_cluster_for){
	      return 1;
	    }
	    cur_point_in_cluster = cur_point_in_cluster->pSetNext;
	  }
	  cur_super_point_in_cluster = cur_super_point_in_cluster->pClusterNext;
	}
      }
    }
  }
  */
  
  return 0;
}


int AlternateViolateConstraints(struct SUPER_POINT * point_to_check, int int_cluster_number, struct DATA * data)
{

  struct CONSTRAINT * cur_constraint;
  
  struct SUPER_POINT * cur_super_point;

  cur_constraint = data->SP_CL_constraints[point_to_check->int_super_point_num][HEAD];
  cur_super_point = data->clusters[int_cluster_number];
  while((cur_constraint != NULL) && (cur_super_point != NULL)){
    if(cur_constraint->int_other_point == cur_super_point->int_super_point_num){
      return 1;
    }
    else if(cur_constraint->int_other_point > cur_super_point->int_super_point_num){
      cur_constraint = cur_constraint->pNext;
    }
    else{
      cur_super_point = cur_super_point->pClusterNext;
    }
  }
  return 0;
}
