
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_N_CLASSES 64
#define MAX_STRLEN 64
#define MAX_N_DIMENSIONS 64
#define MAX_N_NAMES 256

int main() {

	int numclasses;
	char classnames[MAX_N_CLASSES][MAX_STRLEN];
	int classnums[MAX_N_CLASSES];
	int numdimensions;
	char dimensionnames[MAX_N_DIMENSIONS][MAX_STRLEN];
	int dimensionsymbolflags[MAX_N_DIMENSIONS];
	int numptnames, numsvnames, numfgnames;
	char ptnames[MAX_N_NAMES][MAX_STRLEN];
	char svnames[MAX_N_NAMES][MAX_STRLEN];
	char fgnames[MAX_N_NAMES][MAX_STRLEN];

	int symbolics[MAX_N_DIMENSIONS];
	float continuous[MAX_N_DIMENSIONS];


	char ibuffer[512];
	FILE *fptr;
	FILE *fptr2;
	int cind, cind2;
	int i;
	char tempname[512];
	int found;
	int nameind;

	// 1. read in the class names from kddcup.names.txt

	strcpy(classnames[0],"apache2"); classnums[0] = 2; 
	strcpy(classnames[1],"back"); classnums[1] = 2;
	strcpy(classnames[2],"buffer_overflow"); classnums[2] = 3;
	strcpy(classnames[3],"ftp_write"); classnums[3] = 4;
	strcpy(classnames[4],"guess_passwd"); classnums[4] = 4;
	strcpy(classnames[5],"httptunnel"); classnums[5] = 5;
	strcpy(classnames[6],"imap"); classnums[6] = 4;
	strcpy(classnames[7],"ipsweep"); classnums[7] = 1; 
	strcpy(classnames[8],"land"); classnums[8] = 2;
	strcpy(classnames[9],"loadmodule"); classnums[9] = 3; 
	strcpy(classnames[10],"mailbomb"); classnums[10] = 2;
	strcpy(classnames[11],"mscan"); classnums[11] = 1;
	strcpy(classnames[12],"multihop"); classnums[12] = 5;
	strcpy(classnames[13],"named"); classnums[13] = 4;
	strcpy(classnames[14],"neptune"); classnums[14] = 2; 
	strcpy(classnames[15],"nmap"); classnums[15] = 1;
	strcpy(classnames[16],"perl"); classnums[16] = 3;
	strcpy(classnames[17],"phf"); classnums[17] = 4;
	strcpy(classnames[18],"pod"); classnums[18] = 2;
	strcpy(classnames[19],"portsweep"); classnums[19] = 1;
	strcpy(classnames[20],"processtable"); classnums[20] = 2;
	strcpy(classnames[21],"ps"); classnums[21] = 3;
	strcpy(classnames[22],"rootkit"); classnums[22] = 3;
	strcpy(classnames[23],"saint"); classnums[23] = 1;
	strcpy(classnames[24],"satan"); classnums[24] = 1;
	strcpy(classnames[25],"sendmail"); classnums[25] = 4;
	strcpy(classnames[26],"smurf"); classnums[26] = 2;
	strcpy(classnames[27],"snmpgetattack"); classnums[27] = 4;
	strcpy(classnames[28],"snmpguess"); classnums[28] = 4;
	strcpy(classnames[29],"sqlattack"); classnums[29] = 3;
	strcpy(classnames[30],"teardrop"); classnums[30] = 2;
	strcpy(classnames[31],"udpstorm"); classnums[31] = 2;
	strcpy(classnames[32],"warezmaster"); classnums[32] = 2;
	strcpy(classnames[33],"worm"); classnums[33] = 4;
	strcpy(classnames[34],"xlock"); classnums[34] = 4;
	strcpy(classnames[35],"xsnoop"); classnums[35] = 4;
	strcpy(classnames[36],"xterm"); classnums[36] = 3;

	numclasses = 37;
	printf("******* classes *******\n");
	for (i=0;i<numclasses;i++) {
		printf("%d %s\n", i, classnames[i]);
	}



	// 2. read in which dimensions are symbolic from kddcup.names.txt
	numdimensions = 41;
	fptr = fopen("../processed/symbolics.txt","rt");
	for (i=0;i<numdimensions;i++) {
		fscanf(fptr,"%d ",&(dimensionsymbolflags[i]));
	}
	fclose(fptr);

	// 3. read training data file to get all the symbolic names
	fptr = fopen("../kddcup.data_10_percent.txt","rt");
	numptnames = numsvnames = numfgnames = 0;
	while (!feof(fptr)) {
		fgets(ibuffer,512,fptr);
		if (strlen(ibuffer)>3) {
			// read to the first comma
			cind = 0;
			while (ibuffer[cind]!=',') cind++;
			// read ptname
			cind++;
			cind2 = 0;
			while (ibuffer[cind]!=',') {
				tempname[cind2] = ibuffer[cind];
				cind++; cind2++;
			}
			tempname[cind2]='\0';
			found = 0;
			for (i=0;i<numptnames;i++) {
				if (strcmp(tempname,ptnames[i])==0) found = 1;
			}
			if(found==0) {
				strcpy(ptnames[numptnames],tempname);
				numptnames++;
			}
			// read svname	
			cind++;
			cind2 = 0;
			while (ibuffer[cind]!=',') {
				tempname[cind2] = ibuffer[cind];
				cind++; cind2++;
			}
			tempname[cind2]='\0';
			found = 0;
			for (i=0;i<numsvnames;i++) {
				if (strcmp(tempname,svnames[i])==0) found = 1;
			}
			if(found==0) {
				strcpy(svnames[numsvnames],tempname);
				numsvnames++;
			}	
			// read fgname
			cind++;
			cind2 = 0;
			while (ibuffer[cind]!=',') {
				tempname[cind2] = ibuffer[cind];
				cind++; cind2++;
			}
			tempname[cind2]='\0';
			found = 0;
			for (i=0;i<numfgnames;i++) {
				if (strcmp(tempname,fgnames[i])==0) found = 1;
			}
			if(found==0) {
				strcpy(fgnames[numfgnames],tempname);
				numfgnames++;
			}
		}
	}
	fclose(fptr);
	for (i=0;i<numptnames;i++) {
		printf("%d %s\n", i, ptnames[i]);
	}
	for (i=0;i<numsvnames;i++) {
		printf("%d %s\n", i, svnames[i]);
	}
	for (i=0;i<numfgnames;i++) {
		printf("%d %s\n", i, fgnames[i]);
	}
	
	// 4. read testing file output spaces and numbers only
	
	fptr = fopen("../kddcup.testdata.corrected_10_percent.txt","rt");
	fptr2 = fopen("../processed/testdata4.txt","wt");
	while (!feof(fptr)) {
		fgets(ibuffer,512,fptr);
		if (strlen(ibuffer)>3) {
			// read duration
			cind = 0; cind2 = 0;
			while (ibuffer[cind]!=',') {
				tempname[cind2]=ibuffer[cind];
				cind++; cind2++;
			}
			tempname[cind2]='\0';
			continuous[0] = atof(tempname);
			// read pt
			cind++; cind2 = 0;
			while (ibuffer[cind]!=',') {
				tempname[cind2]=ibuffer[cind];
				cind++; cind2++;
			}
			tempname[cind2]='\0';
			for (i=0;i<numptnames;i++) {
				if (strcmp(ptnames[i],tempname)==0) nameind = i;
			}
			symbolics[1] = nameind;
			// read sv
			cind++; cind2 = 0;
			while (ibuffer[cind]!=',') {
				tempname[cind2]=ibuffer[cind];
				cind++; cind2++;
			}
			tempname[cind2]='\0';
			for (i=0;i<numsvnames;i++) {
				if (strcmp(svnames[i],tempname)==0) nameind = i;
			}
			symbolics[2] = nameind;
			// read fg
			cind++; cind2 = 0;
			while (ibuffer[cind]!=',') {
				tempname[cind2]=ibuffer[cind];
				cind++; cind2++;
			}
			tempname[cind2]='\0';
			for (i=0;i<numfgnames;i++) {
				if (strcmp(fgnames[i],tempname)==0) nameind = i;
			}
			symbolics[3] = nameind;
			// read remaining dimensions
			for (i=4;i<numdimensions;i++) {
				cind++; cind2 = 0;
				while (ibuffer[cind]!=',') {
					tempname[cind2]=ibuffer[cind];
					cind++; cind2++;
				}
				tempname[cind2]='\0';
				if (dimensionsymbolflags[i]==0) {
					continuous[i] = atof(tempname);
				} else if (dimensionsymbolflags[i]==1) {
					symbolics[i] = atoi(tempname);
				}
			}
			// read class
			cind++; cind2 = 0;
			while (ibuffer[cind]!='.') {
				tempname[cind2]=ibuffer[cind];
				cind++; cind2++;
			}
			tempname[cind2]='\0';
			for (i=0;i<numclasses;i++) {
				if (strcmp(classnames[i],tempname)==0) nameind = classnums[i];
			}
			// output
			for (i=0;i<numdimensions;i++) {
				if (dimensionsymbolflags[i]==0) {
					fprintf(fptr2, "%f ", continuous[i]);
				} else if (dimensionsymbolflags[i]==1) {
					fprintf(fptr2, "%d ", symbolics[i]);
				}
			}
			fprintf(fptr2,"%d\n",nameind);
		}
	}
	fclose(fptr);
	fclose(fptr2);

	return 0;

}
