
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_N_CLASSES 32
#define MAX_STRLEN 64
#define MAX_N_DIMENSIONS 64
#define MAX_N_NAMES 256

int main() {

	int numclasses;
	char classnames[MAX_N_CLASSES][MAX_STRLEN];
	int numdimensions;
	char dimensionnames[MAX_N_DIMENSIONS][MAX_STRLEN];
	int dimensionsymbolflags[MAX_N_DIMENSIONS];
	int numptnames, numsvnames, numfgnames;
	char ptnames[MAX_N_NAMES][MAX_STRLEN];
	char svnames[MAX_N_NAMES][MAX_STRLEN];
	char fgnames[MAX_N_NAMES][MAX_STRLEN];

	int symbolics[MAX_N_DIMENSIONS];
	float continuous[MAX_N_DIMENSIONS];


	char ibuffer[512];
	FILE *fptr;
	FILE *fptr2;
	int cind, cind2;
	int i;
	char tempname[512];
	int found;
	int nameind;

	// 1. read in the class names from kddcup.names.txt
	fptr = fopen("../kddcup.names.txt","rt");
	fgets(ibuffer,512,fptr);
	numclasses = 0;
	cind = 0;
	while((ibuffer[cind]!='.')&&(ibuffer[cind]!='\0')) {
		cind2 = 0;
		while((ibuffer[cind]!='.')&&(ibuffer[cind]!=',')&&(ibuffer[cind]!='\0')) {
			classnames[numclasses][cind2] = ibuffer[cind];
			cind++;
			cind2++;
		}
		classnames[numclasses][cind2] = '\0';
		if (ibuffer[cind]==',') cind++;
		numclasses++;
	}
	printf("******* classes *******\n");
	for (i=0;i<numclasses;i++) {
		printf("%d %s\n", i, classnames[i]);
	}
	// 2. read in which dimensions are symbolic from kddcup.names.txt
	numdimensions = 0;
	while(!feof(fptr)) {
		fgets(ibuffer,512,fptr);
		for(cind=0;ibuffer[cind]!=':';cind++) {
			dimensionnames[numdimensions][cind] = ibuffer[cind];
		}
		dimensionnames[numdimensions][cind] = '\0';
		cind++;cind++;
		if (ibuffer[cind]=='c') {
			dimensionsymbolflags[numdimensions] = 0;
		} else {
			dimensionsymbolflags[numdimensions] = 1;
		}
		if (strlen(ibuffer)>3) numdimensions++;
	}
	fptr2 = fopen("../processed/dimensions.txt","wt");
	printf("******* dimensions *******\n");
	for (i=0;i<numdimensions;i++) {
		printf("%d %s %d\n", i, dimensionnames[i], dimensionsymbolflags[i]);
		fprintf(fptr2,"%d %s\n", i, dimensionnames[i]);
	}
	fclose(fptr2);
	fclose(fptr);
	// 3. read one pass data file to get all the symbolic names
	/*
	fptr = fopen("../kddcup.data_10_percent.txt","rt");
	numptnames = numsvnames = numfgnames = 0;
	while (!feof(fptr)) {
		fgets(ibuffer,512,fptr);
		if (strlen(ibuffer)>3) {
			// read to the first comma
			cind = 0;
			while (ibuffer[cind]!=',') cind++;
			// read ptname
			cind++;
			cind2 = 0;
			while (ibuffer[cind]!=',') {
				tempname[cind2] = ibuffer[cind];
				cind++; cind2++;
			}
			tempname[cind2]='\0';
			found = 0;
			for (i=0;i<numptnames;i++) {
				if (strcmp(tempname,ptnames[i])==0) found = 1;
			}
			if(found==0) {
				strcpy(ptnames[numptnames],tempname);
				numptnames++;
			}
			// read svname	
			cind++;
			cind2 = 0;
			while (ibuffer[cind]!=',') {
				tempname[cind2] = ibuffer[cind];
				cind++; cind2++;
			}
			tempname[cind2]='\0';
			found = 0;
			for (i=0;i<numsvnames;i++) {
				if (strcmp(tempname,svnames[i])==0) found = 1;
			}
			if(found==0) {
				strcpy(svnames[numsvnames],tempname);
				numsvnames++;
			}	
			// read fgname
			cind++;
			cind2 = 0;
			while (ibuffer[cind]!=',') {
				tempname[cind2] = ibuffer[cind];
				cind++; cind2++;
			}
			tempname[cind2]='\0';
			found = 0;
			for (i=0;i<numfgnames;i++) {
				if (strcmp(tempname,fgnames[i])==0) found = 1;
			}
			if(found==0) {
				strcpy(fgnames[numfgnames],tempname);
				numfgnames++;
			}
		}
	}
	fclose(fptr);
	for (i=0;i<numptnames;i++) {
		printf("%d %s\n", i, ptnames[i]);
	}
	for (i=0;i<numsvnames;i++) {
		printf("%d %s\n", i, svnames[i]);
	}
	for (i=0;i<numfgnames;i++) {
		printf("%d %s\n", i, fgnames[i]);
	}
	*/
	// 4. read second pass data file output spaces and numbers only
	/*
	fptr = fopen("../kddcup.data_10_percent.txt","rt");
	fptr2 = fopen("../processed/data.txt","wt");
	while (!feof(fptr)) {
		fgets(ibuffer,512,fptr);
		if (strlen(ibuffer)>3) {
			// read duration
			cind = 0; cind2 = 0;
			while (ibuffer[cind]!=',') {
				tempname[cind2]=ibuffer[cind];
				cind++; cind2++;
			}
			tempname[cind2]='\0';
			continuous[0] = atof(tempname);
			// read pt
			cind++; cind2 = 0;
			while (ibuffer[cind]!=',') {
				tempname[cind2]=ibuffer[cind];
				cind++; cind2++;
			}
			tempname[cind2]='\0';
			for (i=0;i<numptnames;i++) {
				if (strcmp(ptnames[i],tempname)==0) nameind = i;
			}
			symbolics[1] = nameind;
			// read sv
			cind++; cind2 = 0;
			while (ibuffer[cind]!=',') {
				tempname[cind2]=ibuffer[cind];
				cind++; cind2++;
			}
			tempname[cind2]='\0';
			for (i=0;i<numsvnames;i++) {
				if (strcmp(svnames[i],tempname)==0) nameind = i;
			}
			symbolics[2] = nameind;
			// read fg
			cind++; cind2 = 0;
			while (ibuffer[cind]!=',') {
				tempname[cind2]=ibuffer[cind];
				cind++; cind2++;
			}
			tempname[cind2]='\0';
			for (i=0;i<numfgnames;i++) {
				if (strcmp(fgnames[i],tempname)==0) nameind = i;
			}
			symbolics[3] = nameind;
			// read remaining dimensions
			for (i=4;i<numdimensions;i++) {
				cind++; cind2 = 0;
				while (ibuffer[cind]!=',') {
					tempname[cind2]=ibuffer[cind];
					cind++; cind2++;
				}
				tempname[cind2]='\0';
				if (dimensionsymbolflags[i]==0) {
					continuous[i] = atof(tempname);
				} else if (dimensionsymbolflags[i]==1) {
					symbolics[i] = atoi(tempname);
				}
			}
			// read class
			cind++; cind2 = 0;
			while (ibuffer[cind]!='.') {
				tempname[cind2]=ibuffer[cind];
				cind++; cind2++;
			}
			tempname[cind2]='\0';
			for (i=0;i<numclasses;i++) {
				if (strcmp(classnames[i],tempname)==0) nameind = i;
			}
			// output
			for (i=0;i<numdimensions;i++) {
				if (dimensionsymbolflags[i]==0) {
					fprintf(fptr2, "%f ", continuous[i]);
				} else if (dimensionsymbolflags[i]==1) {
					fprintf(fptr2, "%d ", symbolics[i]);
				}
			}
			fprintf(fptr2,"%d\n",nameind);
		}
	}
	fclose(fptr);
	fclose(fptr2);
	
	// 5. output symbolic/continuous flags file
	fptr = fopen("../processed/symbolics.txt","wt");
	for (i=0;i<numdimensions;i++) {
		fprintf(fptr,"%d ",dimensionsymbolflags[i]);
	}
	fprintf(fptr,"\n");
	fclose(fptr);
	
	*/

	return 0;

}

