// This file implements the PPH algorithm introduced in
// Vineet Bafna, Dan Gusfield, Giuseppe Lancia and Shibu Yooseph. Haplotyping as Perfect Phylogeny: A direct approach. Technical Report, UC Davis, Department of Computer Science, July 17, 2002.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>

#include "build_tree.h"
#include "tree.h"
#include "queue.h"
#include "pph.h"

int old[MAXSIZE];
int divide[MAXSIZE];
int divide1[MAXSIZE];
int ptr = 0;

struct NODE {
	struct NODE *next;
	struct NODE *next2;
	struct NODE *prev;
	int num;
};

NODE Ef[MAXSIZE];
NODE En[MAXSIZE];
SearchTree P,C,Et,Es;
int component[MAXSIZE];

// Perform DFS on all the edges
void depth_first_search1(int v) {
	old[v] = 1;
	NODE *tt = Ef[v].next;
	divide1[ptr++] = v;
	
	while(tt!=NULL) {
		if(old[tt->num]==0)
			depth_first_search1(tt->num);
		tt = tt->next;
	}
	
	tt = En[v].next;
	while(tt!=NULL) {
		if(old[tt->num]==0)
			depth_first_search1(tt->num);
		tt = tt->next;
	}
}

// Perform DFS on the colored edges
void depth_first_search2(NODE *Ef,int v,int color) {
	old[v] = 1;
	NODE *tt = Ef[v].next;
	component[v] = color;
	divide[ptr++] = v;
	
	while(tt!=NULL) {
		if(old[tt->num]==0)
			depth_first_search2(Ef,tt->num,color);
		tt = tt->next;
	}
}

bool expand00(char a,char b) {
	if(a=='0' && b=='0')
		return true;
	else if(a=='2' && b=='0')
		return true;
	else if(a=='0' && b=='2')
		return true;
	else if(a=='2' && b=='1')
		return true;
	else if(a=='1' && b=='2')
		return true;
	else if(a=='0' && b=='1')
		return true;
	else
		return false;
}

bool expand01(char a,char b) {
	if(a=='0' && b=='1')
		return true;
	else if(a=='0' && b=='2')
		return true;
	else
		return false;
}

bool expand10(char a,char b) {
	if(a=='1' && b=='0')
		return true;
	else if(a=='2' && b=='0')
		return true;
	else
		return false;
}

bool expand11(char a,char b) {
	if(a=='1' && b=='1')
		return true;
	else if(a=='2' && b=='1')
		return true;
	else if(a=='1' && b=='2')
		return true;
	else
		return false;
}

// Add the edge (u,v) into Ef
void unionf(int u,int v) {
	NODE *newnode = new NODE;
	newnode->num = v;
	newnode->next = Ef[u].next;
	newnode->next2 = Ef[u].next;
	newnode->prev = NULL;
	
	if(Ef[u].next!=NULL)
		Ef[u].next->prev = newnode;
		
	Ef[u].next = newnode;
	Ef[u].next2 = newnode;
}

// Add the edge (u,v) into En
void unionn(int u,int v) {
	NODE *newnode = new NODE;
	newnode->num = v;
	newnode->next = En[u].next;
	newnode->prev = NULL;
	
	if(En[u].next!=NULL)
		En[u].next->prev = newnode;
		
	En[u].next = newnode;
}

// remove the edge (v,u) from En
bool removen(int v,int u) {
	NODE *neighbor;
	neighbor = En[v].next;
	int hit = 0;
	
	while(neighbor!=NULL) {
		if(neighbor->num == u) {
			hit = 1;
			break;
		}
		neighbor = neighbor->next;
	}
	
	if(hit==1) {
		if(neighbor->prev!=NULL) {
			if(neighbor->next == NULL)
				neighbor->prev->next = NULL;
			else {
				neighbor->prev->next = neighbor->next;
				neighbor->next->prev = neighbor->prev;
			}
		} else {
			if(neighbor->next == NULL)
				En[v].next = NULL;
			else {
				En[v].next = neighbor->next;	
				neighbor->next->prev = NULL;
			}
		}
	} else {
		return false;
	}
	return true;
}

// display the content of Ef (for debug)
void showEf() {
	int i=0;
	NODE *temp;
	
	for(i=0;i<1000;i++) {
		temp = Ef[i].next;
		
		if(temp!=NULL)
			printf("%d",i+1);
			
		while(temp!=NULL) {
			printf(" %d",temp->num+1);
			temp = temp->next;
		}
		if(Ef[i].next!=NULL)
			printf("\n");
	}
	printf("end showing ef\n");	
}

// display the content of En (for debug)
void showEn() {
	int i=0;
	NODE *temp;
	
	for(i=0;i<1000;i++) {
		temp = En[i].next;
		
		if(temp!=NULL)
			printf("%d",i+1);
			
		while(temp!=NULL) {
			printf(" %d",temp->num+1);
			temp = temp->next;
		}
		if(En[i].next!=NULL)
			printf("\n");
	}
	printf("end showing en\n");	
}

void ResetEf(int len) {
	int i;
	NODE *temp;
	
	for(i=0;i<len;i++) {
		temp = Ef[i].next;
		Ef[i].next2 = Ef[i].next;
	}
}

// BuildGraph scan each pair of columns in M and determine their relationships
bool BuildGraph(char *M,int m,int n,int *ex)  {
	int i,j,jp;
	bool Is00,Is01,Is10,Is11,Is22;

	for(j=0;j<m;j++) {
		for(jp=0;jp<j;jp++) {
			Is00 = Is01 = Is10 = Is11 = Is22 = false;
			for(i=0;i<n;i++) {
				if(M[i*m+jp]=='2' && M[i*m+j]=='2') 
					Is22 = true;
				else {
					if(expand00(M[i*m+jp],M[i*m+j]))
						Is00 = true;
					if(expand01(M[i*m+jp],M[i*m+j]))
						Is01 = true;
					if(expand10(M[i*m+jp],M[i*m+j]))
						Is10 = true;
					if(expand11(M[i*m+jp],M[i*m+j]))
						Is11 = true;
				}
			}
			
			if(Is01 && Is10 && Is11) // no tree possible in this case
				return false;
			else if(Is00 && Is11 && Is22) { // forced in-phase
				unionf(jp,j); // put edge (jp,j) into Ef
				ex[jp*m+j] = 0; // use 0 to represent forced in-phase
				
				unionf(j,jp);
				ex[j*m+jp] = 0;
			} else if(Is01 && Is10 && Is22) { // forced out-of-phase
				unionf(jp,j); // put edge (jp,j) into Ef
				ex[jp*m+j] = 1; // use 1 to represent forced out-of-phase
				
				unionf(j,jp);
				ex[j*m+jp] = 1;
			} else if(Is22) { // their relationship has not been decided
				unionn(jp,j); // put (jp,j) into En
				ex[jp*m+j] = -1;
				
				unionn(j,jp);
				ex[j*m+jp] = -1;
			}
		}
	}
	return true;
}

// if Ef is empty, arbitrarily select an edge from En and put it
// into Ef
void check_empty_Ef(Queue ET,int *ex,int column) {
	int i,j,v,u,hasEf = 0;
	for(i=0;i<column*column;i++) {
		if(ex[i]==1) {
			hasEf = 1;  // Ef is not empty
			break;
		}
	}
	
	if(hasEf==0) { // Ef is empty
		NODE *temp;
	
		// find an edge in En and put it into Ef
		for(j=0;j<MAXSIZE;j++) {
			temp = En[j].next;
		
			if(temp!=NULL) {
				v = j;
				u = temp->num;
				break;
			}
		}
	
		ElementTypeS *element = new ElementTypeS;
        element->v = v;
        element->u = u;
		Enqueue( *element, ET );
		Et = Insert2(v,u,Et);
	}
}

// check if (v,u) is in Ef or En
bool isEfEn(int v,int u) {
	NODE *neighbor;
	
	neighbor = Ef[v].next;
	while(neighbor!=NULL) {
		if(neighbor->num == u)
			return true;
		neighbor = neighbor->next;
	}
	
	neighbor = En[v].next;
	while(neighbor!=NULL) {
		if(neighbor->num == u)
			return true;
		neighbor = neighbor->next;
	}
	return false;
}

// exclusive or
int xxor(int x,int y) {
	if(x!=y)
		return 1;
	else
		return 0;
}

int *search,*ex,*next,*prev;

// The coloring process
// This procedure finds the triangles and color the uncolored edges
void process(int v,int m) {
	NODE *neighbor;
	int u;
	Position pos;

	neighbor = En[v].next;
	while(neighbor!=NULL) {
		u = neighbor->num; // select a neighbor u from the list of v's neighbor
		
		if(ex[v*m+u]==-1) { // the phase relation between v and u has not been decided
			if((pos = Find(u,P)) != NULL) {
				// u is already in the path. So DFS travels back to u and forms a circle
				Stack S = CreateStack(MAXSIZE);
				
				// the relationship betwen v and u has not been decided and
				// the edge (prev[v],u) is not in Ef and En. According to
				// Corollary 9 in the paper, (v,next[u]) must be in Ef and En. So we
				// push next[u] into stack for later operations.
				while(prev[v]!=-1 && next[u]!=-1 && ex[v*m+u]==-1 && !isEfEn(prev[v],u)) {
					ElementTypeS *element = new ElementTypeS;
            		element->u = u;
            		Push( *element, S );
            		u = next[u];
				}
				
				//(prev[v],u) is in Ef and En.
				if(prev[v]!=-1 && ex[v*m+u] == -1) {
					if(ex[prev[v]*m+u]!=-1) {
						if(ex[v*m+prev[v]]!=-2 && ex[v*m+prev[v]]!=-1) {
							ex[v*m+u] = xxor(ex[prev[v]*m+u],ex[v*m+prev[v]]);
							ex[u*m+v] = ex[v*m+u];
						} else {
							ex[v*m+u] = xxor(ex[prev[v]*m+u],ex[prev[v]*m+v]);
							ex[u*m+v] = ex[v*m+u];
						}
					
						Et = Delete2(u,v,Et);
						Et = Delete2(v,u,Et);
						Es = Delete2(u,v,Es);
						Es = Delete2(v,u,Es);											
					}
				}
				
				//(v,next[u]) is in Ef and En.
				while( !IsEmpty( S ) ) {
					ElementTypeS e = Top( S );
					u = e.u;
					
					if(ex[u*m+next[u]]!=-2 && ex[u*m+next[u]]!=-1) {
						ex[v*m+u] = xxor(ex[v*m+next[u]],ex[u*m+next[u]]);
						ex[u*m+v] = ex[v*m+u];
					} else {
						ex[v*m+u] = xxor(ex[v*m+next[u]],ex[next[u]*m+u]);
						ex[u*m+v] = ex[v*m+u];
					}
					
					Et = Delete2(v,u,Et);
					Et = Delete2(u,v,Et);
					Es = Delete2(v,u,Es);
					Es = Delete2(u,v,Es);
					Pop( S );
				}
				DisposeStack( S );
			} else if((pos = Find(u,C)) != NULL) {
				// u is not in the path but in the same connected component with
				// v, so this path crosses an old path. Perform almost the same step
				// as u is in the same path with v.
				Stack S = CreateStack(MAXSIZE);
				
				while(prev[v]!=-1 && prev[u]!=-1 && ex[v*m+u]==-1 && !isEfEn(prev[v],u)) {
					ElementTypeS *element = new ElementTypeS;
			
            				element->u = u;
            				Push( *element, S );
            				u = prev[u];
				}
				
				if(prev[v]!=-1 && ex[v*m+u] == -1) {
					if(ex[prev[v]*m+u]!=-1) {
						if(ex[v*m+prev[v]]!=-2 && ex[v*m+prev[v]]!=-1) {
							ex[v*m+u] = xxor(ex[prev[v]*m+u],ex[v*m+prev[v]]);
							ex[u*m+v] = ex[v*m+u];
						} else {
							ex[v*m+u] = xxor(ex[prev[v]*m+u],ex[prev[v]*m+v]);
							ex[u*m+v] = ex[v*m+u];
						}
					
						Et = Delete2(u,v,Et);
						Et = Delete2(v,u,Et);
						Es = Delete2(u,v,Es);
						Es = Delete2(v,u,Es);
					}
				}
				
				while( !IsEmpty( S ) ) {
					ElementTypeS e = Top( S );
					u = e.u;
					
					if(ex[u*m+prev[u]]!=-2 && ex[u*m+prev[u]]!=-1) {
						ex[v*m+u] = xxor(ex[v*m+prev[u]],ex[u*m+prev[u]]); // not sure
						ex[u*m+v] = ex[v*m+u];
					} else {
						ex[v*m+u] = xxor(ex[v*m+prev[u]],ex[prev[u]*m+u]);
						ex[u*m+v] = ex[v*m+u];
					}
						
					Et = Delete2(v,u,Et);
					Et = Delete2(u,v,Et);
					Es = Delete2(v,u,Es);
					Es = Delete2(u,v,Es);
					Pop( S );
				}
				DisposeStack( S );
			} else {
				// u is neither in the same path nor the same connected component
				// with v. So (u,v) is the edge between two different connected
				// components of colored edges.
				ElementTypeS *element = new ElementTypeS;
            	element->v = v;
            	element->u = u;
            			
            	if(component[v] != component[u]) {
					Et = Insert2(v,u,Et);
            	} else {
					Es = Insert2(v,u,Es);
            	}
			}
		}
		neighbor = neighbor->next;
	}
}

Stack Set; // A stack which tracks the paths in depth first search

// Find triangles and color the uncolored edges
void SetEdges(int v,int m) {
	NODE *neighbor;
	int u;
	
	P = Insert( v, P ); // P keeps track of the current DFS path
	C = Insert( v, C ); // C keeps track of the current connected component
	search[v] = 1; // mark v as a visited node
	
	process(v,m); // The coloring process

	ElementTypeS *element = new ElementTypeS;
    element->u = v;
    Push( *element, Set );

	while( !IsEmpty( Set ) ) {
		while(1) {
			ElementTypeS e = Top( Set );
			v = e.u; // v is the vertex on the top of Set
			neighbor = Ef[v].next2; // Ef[v].next2 points to the neighbors of v which have not been visited

			if(neighbor!=NULL) {
				Ef[v].next2 = neighbor->next;
				u = neighbor->num;
			} else
				break;
		
			if(search[u] == -1) {
				search[u] = 1;
				next[v] = u;
				prev[u] = v;
				P = Insert( u, P );
				C = Insert( u, C );
				process(u,m);
				element = new ElementTypeS;
				element->u = u;
				Push( *element, Set );
				P = Delete(u,P);
			}
		}

		Pop( Set );
	}

	P = Delete( v, P );
}

// Generate output haplotypes by the color relationships ex found before
// see the paper for the detailed description of this method
char *E2M(char *M,int row,int column,int *ex) {
	char *Mp = new char[2*row*column+1];
	
	int i,j,k;
	int temp;
	
	for(i=0;i<2*row*column;i++) 
		Mp[i] = '2';
		
	for(i=0;i<row;i++) {
		k = 10000;
		for(j=0;j<column;j++) {
			if(M[i*column+j] == '0') {
				Mp[2*i*column+j] = '0';
				Mp[(2*i+1)*column+j] = '0';
			} else if(M[i*column+j] == '1') {
				Mp[2*i*column+j] = '1';
				Mp[(2*i+1)*column+j] = '1';
			} else {
				if(k<j) {
					if(ex[k*column+j]==-1)
						ex[k*column+j] = 0;
						
					temp = xxor((int)Mp[2*i*column+k]-48,ex[k*column+j]);
						
					Mp[2*i*column+j] = (char)(temp+48);
					Mp[(2*i+1)*column+j] = 	(char)(1-temp+48);
				} else {
					Mp[2*i*column+j] = '1';
					Mp[(2*i+1)*column+j] = '0';
				}
			}
			
			if(M[i*column+j] == '2') 
				k = j;
		}
	}
	
	return Mp;
}

// Empty the linked list
void delete_link(NODE *E) {
	int i=0;
	NODE *temp,*temp1;
	
	for(i=0;i<1000;i++) {
		temp = E[i].next;
			
		while(temp!=NULL) {
			temp1 = temp->next;
			delete temp;
			temp = temp1;
		}
	}
}

// Use majority rule to generate the root vector
char *generate_default(char *cvector, char *M,int row,int column) {
	int i,j;
	int one,zero;
	
	for(i=0;i<column;i++) {
		one = 0;
		zero = 0;
		for(j=0;j<row;j++) {
			if(M[j*column+i]=='1')
				one++;
			else if(M[j*column+i]=='0')
				zero++;
		}
		if(zero >= one)
			cvector[i] = '0';
		else
			cvector[i] = '1';
	}
	cvector[i] = '\0';
	return cvector;
}

// read the root vector
char *read_vector(char *cvector,char *filename,int row, int num,char *M) {
	cvector = new char[num+1];
	FILE *fp;
	int i = 0;
	int tt;
	
	if(strcmp(filename,"d")!=0 && strcmp(filename,"m")!=0) {
		fp = fopen(filename,"r");
		
		for(i=0;i<num;i++) {	
			if(fscanf(fp,"%d",&tt)==EOF) {
				printf("vector information error!\n");
				exit(0);
			}
			cvector[i] = (char)tt+48;
		}
		cvector[i] = '\0';
		fclose(fp);
	} else if(strcmp(filename,"m")==0) {
		cvector = generate_default(cvector,M,row,num);
	} else if(strcmp(filename,"d")==0) {
		for(i=0;i<num;i++) 
			cvector[i] = '0';
		cvector[i] = '\0';
	}
	return cvector;
}

// convert the genotypes if the root vector is specified
char *convert_data(char *M, char *vector, int row, int column) {
	int i,j;
	
	for(i=0;i<column;i++) {
		if(vector[i] == '1') {
			for(j=0;j<row;j++) {
				if(M[j*column+i]=='1')
					M[j*column+i] = '0';
				else if(M[j*column+i]=='0')
					M[j*column+i] = '1';
			}
		}
	}
	
	return M;
}

// convert the haplotypes back if the root vector is specified
char *convert_back(char *output,int row,int column,char *cvector) {
	int i,j;
	row = row*2;
	for(i=0;i<column;i++) {
		if(cvector[i]=='1') {
			for(j=0;j<row;j++) {
				if(output[j*column+i]=='0')
					output[j*column+i] = '1';
				else if(output[j*column+i]=='1')
					output[j*column+i] = '0';
			}
		}
	}
	return output;
}

// The main function
double pph(void) {
	FILE *fp;
	int column,row,digit,u,v,color=1;
	int i = 0;
	char *M,*Mp;
	char fvector[100];
	char *cvector;
	
	fp = fopen("temp.1","r");
	fscanf(fp,"%d%d",&row,&column);
	
	M = new char[row*column+1];
	ex = new int[column*column+1];
	search = new int[column+1];
	next = new int[column+1];
	prev = new int[column+1];
	
	// read the input genotypes and put them into M
	while(fscanf(fp,"%d",&digit)!=EOF) {
		M[i] = (char)digit+48;
		i++;
	}
	
	fclose(fp);
	
	// ask user if the root vector is specified
	printf("Please input the file name of the vector (enter \"d\" for the default or \"m\" for the majority vector): ");
	scanf("%s",fvector);
	
	if(strcmp(fvector,"d")!=0 && strcmp(fvector,"m")!=0) {
		fp = fopen(fvector,"r");
		if(fp==NULL) {
			printf("Can not open file %s\n",fvector);
			exit(0);
		}
	}
	
	// read the root vector file or generate it by the majority rule
	cvector = read_vector(cvector,fvector,row,column,M);
	// convert the genotypes according to the root vector
	M = convert_data(M,cvector,row,column);
	
	long clk1 = clock(); // start the clock

	// initialize the parameters
	for(i=0;i<MAXSIZE;i++) {
		Ef[i].next = NULL;
		En[i].next = NULL;
	}
	
	for(i=0;i<column*column+1;i++)
		ex[i] = -2;
	
	for(i=0;i<column;i++) {
		prev[i] = -1;
		next[i] = -1;
	}

	// determine the relationships between pairs of columns
	bool issolution = BuildGraph(M,column,row,ex);
	
	// initialize some paramenters
	int l = 0;
	
	for(i=0;i<MAXSIZE;i++) {
		old[i]=0;
		divide1[i] = -2;
	}
	
	ptr = 0;
	
	// determine the uniqueness of the solutions
	// count the number of connected components of colored edges 
	// l represents the number
	for(i=0;i<column;i++) {
		if(old[i]==0) {
			l++;
			depth_first_search1(i);	
			divide1[ptr++] = -1;
		}
	}

	int k = 0;
	for(i=0;i<MAXSIZE;i++) {
		old[i]=0;
		component[i] = -1;
		divide[i] = -2;
	}
	
	ptr = i = 0;
	divide[0] = divide[1] = -1;
	ptr = 2;
	
	// count the number of connected components of uncolored edges
	// k represents the number
	while(divide1[i]!=-2) {
		if(old[divide1[i]]==0 && divide1[i]!=-1) {
			k++;
			depth_first_search2(Ef,divide1[i],color);
			color++;	
			divide[ptr++] = -1;
		} else if(divide1[i]==-1) {
			divide[ptr++] = -1;
		}
		i++;
	}
			
	if(!issolution) {
		printf("No Tree!\n");
		exit(0);
	}
	
	C = MakeEmpty( NULL );
	P = MakeEmpty( NULL );
	Et = MakeEmpty2( NULL );
	Es = MakeEmpty2( NULL );

	Queue ET;
	Queue ES;
	
	ET = CreateQueue(column*column/2);
	ES = CreateQueue(column*column/2);
	Set = CreateStack(MAXSIZE);

	for(i=0;i<column+1;i++)
		search[i] = -1;
	
	// check if Ef is empty
	check_empty_Ef(ET,ex,column);

	// begin the coloring process
	for(i=0;i<column;i++) {
		if(Ef[i].next!=NULL && search[i]==-1) {	
			SetEdges(i,column);
		}	
	}

	// handle the edges inside the same connected component
	while(Es!=NULL) {
				v = Es->Element;
				SearchTree sub = Es->Child;
				u = sub->Element;
				Es = Delete2(v,u,Es);
				SetEdges(v,column);
	}
	
	// handles the edges between two different connected components of colored edges
	while(Et!=NULL) {
				// arbitrarily select an edge (v,u) from En and put it into Ef
				v = Et->Element;
				SearchTree sub = Et->Child;
				
				u = sub->Element;
				Et = Delete2(v,u,Et);
				
				ex[v*column+u] = 0;
				ex[u*column+v] = 0;
					
				unionf(v,u);
				unionf(u,v);
				
				removen(v,u);
				removen(u,v);
				
				SetEdges(v,column);
	}

	delete search;
	delete prev;
	delete next;
	DisposeQueue( ET );
	DisposeQueue( ES );
	MakeEmpty(C);
	MakeEmpty(P);
	delete_link(Ef);
	delete_link(En);
	
	// Generate the haplotypes from ex
	Mp = E2M(M,row,column,ex);
	
	delete M;
	delete ex;
	
	long clk2 = clock(); // stop the clock
	
	// write it into a file
	char tt;
	fp = fopen("temp.1co","w");
	fprintf(fp,"%d %d\n",2*row,column);
	for(i=0;i<2*row;i++) {
		for(int j=0;j<column;j++) {
			tt = Mp[i*column+j];
			fprintf(fp,"%c ",tt);
		}
		fprintf(fp,"\n");
	}
	fclose(fp);

	// build a tree and check if they are consistent
	int pass = build_tree();
	
	if(pass==0) {
		printf("No tree!\n");
		exit(0);
	}
	
	// convert the data if a root vector is specified
	Mp = convert_back(Mp,row,column,cvector);

	// write it into a file
	fp = fopen("temp.1co","w");
	fprintf(fp,"%d %d\n",2*row,column);
	for(i=0;i<2*row;i++) {
		for(int j=0;j<column;j++) {
			tt = Mp[i*column+j];
			fprintf(fp,"%c ",tt);
		}
		fprintf(fp,"\n");
	}
	fclose(fp);
	
	fp = fopen("foroutput","w");
	fprintf(fp,"%d %d\n",2*row,column);
	for(i=0;i<2*row;i++) {
		for(int j=0;j<column;j++) {
			tt = Mp[i*column+j];
			fprintf(fp,"%c",tt);
		}
		fprintf(fp,"\n");
	}
	fclose(fp);
	
	delete Mp;	
	
	printf("Total running time: %f seconds\n",(float)(clk2-clk1)/CLOCKS_PER_SEC);	
	
	// count the number of uniqueness
	double diff = k-l;

	fp = fopen("divide","w");
	
	i = 0;
	while(divide[i]!=-2) {
		fprintf(fp,"%d ",divide[i]);	
		i++;
	}
	fclose(fp);
	
	return diff;
} 
