//backprop.cpp
//Implements back-propagation algorithm. 2 inputs, 4 outputs, variable hidden layers/nodes.
//Bryan Topp <betopp@cs.unm.edu>

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <vector>
#include <assert.h>
#include <string>
#include <sstream>
#include <algorithm> //shuffle

#define MAXEPOCHS 10000
#define DUMPFRAMEDIM 512
#define DUMPFRAMEDIM_s "512"

double eta;
int num_hidden_layers;
double momentum_scale;

double rand01()
{
	return (double)rand() / (double)RAND_MAX;
}

double sigmoid(double input)
{
	return 1.0 / (1.0 + exp(-input));
}
double sigmoidprime(double input)
{
	return sigmoid(input) * (1.0 - sigmoid(input));
}

//Turns a class number into a set of desired neural outputs.
const double *desired_output_for_class(int c)
{
	static const double c1_d[] = {1.0, 0.0, 0.0, 0.0};
	static const double c2_d[] = {0.0, 1.0, 0.0, 0.0};
	static const double c3_d[] = {0.0, 0.0, 1.0, 0.0};
	static const double c4_d[] = {0.0, 0.0, 0.0, 1.0};
	assert(c >= 1);
	assert(c <= 4);
	
	if(c == 1)
		return c1_d;
	if(c == 2)
		return c2_d;
	if(c == 3)
		return c3_d;
	if(c == 4)
		return c4_d;
}

class Neuron
{
public:
	std::vector<double> IncomingWeights;
	std::vector<double> IncomingWeightsMomentum;
	double BiasWeight;
	double v;
	double y; //the output of this neuron (forward-prop)
	double partial_deriv; //the effect on the error for exciting this neuron (back-prop)

	Neuron(unsigned int numweights);
};

class Layer
{
public:
	std::vector<Neuron> Neurons;
	Layer *PreviousLayer;
	Layer *NextLayer;

	Layer(unsigned int numneurons, unsigned int weights_per_neuron);
};


Neuron::Neuron(unsigned int numweights)
{
	for(unsigned int w = 0; w < numweights; w++)
	{
		IncomingWeights.push_back(rand01() - rand01());
		IncomingWeightsMomentum.push_back(0);
	}
	
	BiasWeight = 0.0;
}

Layer::Layer(unsigned int numneurons, unsigned int weights_per_neuron)
{
	for(unsigned int n = 0; n < numneurons; n++)
	{
		Neurons.push_back(Neuron(weights_per_neuron));
	}
}

class InputPoint
{
public:
	double x[2];
	int desired;

};

std::vector<InputPoint> TrainingData;
std::vector<InputPoint> TestingData;

void LoadData(const char *filename, std::vector<InputPoint> &points)
{
	FILE *inf = fopen(filename, "r");
	assert(inf);
	
	while(1)
	{
		int desired, index;
		double x1, x2;
		
		if(fscanf(inf, " %d %d %lf %lf ", &desired, &index, &x1, &x2) != 4)
			break;
		
		InputPoint newpoint;
		
		newpoint.x[0] = x1;
		newpoint.x[1] = x2;
		newpoint.desired = desired;
		
		assert(newpoint.desired >= 1);
		assert(newpoint.desired <= 4);
		
		points.push_back(newpoint);
	}
	
	printf("Read %lu points from %s\n", points.size(), filename);
	
	fclose(inf);
}

//Feeds input layer and propagates through all subsequent layers.
void ForwardPropagate(Layer &InputLayer, float x1, float x2)
{
	//Set up the input nodes with the correct data.
	InputLayer.Neurons[0].y = InputLayer.Neurons[0].v = x1;
	InputLayer.Neurons[1].y = InputLayer.Neurons[1].v = x2;
	
	//Forward propagate through all layers
	Layer *TargetLayer = InputLayer.NextLayer;
	while(TargetLayer != NULL)
	{
		//Consider each neuron in the receiving layer
		for(unsigned int target = 0; target < TargetLayer->Neurons.size(); target++)
		{
			Neuron *t = &(TargetLayer->Neurons[target]);
			
			//Initialize activation with bias
			t->v = t->BiasWeight;
			
			//Accumulate incoming weights
			assert(t->IncomingWeights.size() == TargetLayer->PreviousLayer->Neurons.size());
			for(unsigned int exciter = 0; exciter < t->IncomingWeights.size(); exciter++)
			{
				t->v += t->IncomingWeights[exciter] * TargetLayer->PreviousLayer->Neurons[exciter].y;
			}
			
			//Calculate output
			t->y = sigmoid(t->v);
		}
		
		if(TargetLayer->NextLayer != NULL)
			assert(TargetLayer->NextLayer->PreviousLayer == TargetLayer);
		
		TargetLayer = TargetLayer->NextLayer;
		
	}	
	
}

int main(int argc, const char **argv)
{
	if(argc < 8)
	{
		printf("usage: %s <eta> <momentum> <# hidden layers> <dump yuv? 0/1> <random seed> <shuffle training 0/1> <hidden node counts...>\n", argv[0]);
		exit(-1);
	}
	
	eta = atof(argv[1]);
	momentum_scale = atof(argv[2]);
	num_hidden_layers = atoi(argv[3]);
	
	int should_dump = atoi(argv[4]);
	int rseed = atoi(argv[5]);
	int shuffle_training = atoi(argv[6]);
	
	printf("Using learning rate (eta) of %lf.\n", eta);
	printf("Using momentum %lf.\n", momentum_scale);
	printf("Using %d hidden layers.\n", num_hidden_layers);
	
	FILE *dumpfile = NULL;
	if(should_dump)
	{
		printf("Dumping YUV4MPEG grid animation.\n");
		dumpfile = fopen("boundary.yuv4mpeg", "wb");
		assert(dumpfile);
		
		fprintf(dumpfile, "YUV4MPEG2 W"DUMPFRAMEDIM_s" H"DUMPFRAMEDIM_s" F30 Ip A1:1 C444\n");
	}
	else
	{
		printf("Not dumping grid animation data.\n");
	}
	
	
	printf("Using random seed %d.\n", rseed);
	srand(rseed);
	
	if(shuffle_training)
		printf("Shuffling training data.\n");
	else
		printf("Presenting training in-order.\n");
	
	
	printf("Hidden layer node configuration:\n");
	std::vector<int> hidden_layer_node_counts;
	for(int a = 7; a < argc; a++)
	{
		printf("\t%d\n", atoi(argv[a]));
		hidden_layer_node_counts.push_back(atoi(argv[a]));
	}
	assert(hidden_layer_node_counts.size() == num_hidden_layers);
	
	
	//Open file to dump generalization plots.
	std::ostringstream filename;
	filename << "gen/gen_eta" << eta << "_mom" << momentum_scale << "_rseed" << rseed << "_shuf" << shuffle_training << "_";
	for(unsigned int a = 0; a < hidden_layer_node_counts.size(); a++)
	{
		if(a != 0)
			filename << "-";
		
		filename  << hidden_layer_node_counts[a];
	}
	
	printf("Printing generalization plot to %s\n", filename.str().c_str());
	FILE *genf = fopen(filename.str().c_str(), "w");
	assert(genf);
	
	LoadData("TrainingData.txt", TrainingData);
	LoadData("TestingData.txt", TestingData);
	
	assert(num_hidden_layers > 0);
	assert(eta > 0.0);
	assert(momentum_scale >= 0.0);
	
	Layer InputLayer(2, 0);
	InputLayer.PreviousLayer = NULL;
	
	//Build hidden layers
	std::vector<Layer> HiddenLayers;
	for(unsigned int hl = 0; hl < num_hidden_layers; hl++)
	{
		if(hl == 0) //first hidden layer has 2 inputs per node, from the input layer
		{
			HiddenLayers.push_back(Layer(hidden_layer_node_counts[hl], 2));
			HiddenLayers[hl].PreviousLayer = &InputLayer;
			InputLayer.NextLayer = &(HiddenLayers[hl]);
		}
		else
		{
			HiddenLayers.push_back(Layer(hidden_layer_node_counts[hl], hidden_layer_node_counts[hl-1]));
			HiddenLayers[hl].PreviousLayer = &(HiddenLayers[hl-1]);
			HiddenLayers[hl-1].NextLayer = &(HiddenLayers[hl]);
		}
		
	}

	//Make pointers - AFTER the dynamic vector has resized itself as necessary.
	for(unsigned int hl = 0; hl < num_hidden_layers; hl++)
	{
		if(hl == 0)
		{
			HiddenLayers[hl].PreviousLayer = &InputLayer;
			InputLayer.NextLayer = &(HiddenLayers[hl]);
		}
		else
		{
			HiddenLayers[hl].PreviousLayer = &(HiddenLayers[hl-1]);
			HiddenLayers[hl-1].NextLayer = &(HiddenLayers[hl]);
		}
		
	}	
	
	Layer OutputLayer(4, hidden_layer_node_counts[hidden_layer_node_counts.size()-1]);
	OutputLayer.PreviousLayer = &(HiddenLayers[HiddenLayers.size()-1]);
	HiddenLayers[HiddenLayers.size()-1].NextLayer = &OutputLayer;
	OutputLayer.NextLayer = NULL;
	
	
	assert(InputLayer.Neurons.size() == 2);
	assert(OutputLayer.Neurons.size() == 4);
	assert(HiddenLayers.size() == num_hidden_layers);
	assert(HiddenLayers[0].Neurons.size() == hidden_layer_node_counts[0]);
	
	
	double last_training_error = 1000.0;
	int epochs_since_significant_improvement = 0;
	
	
	int epoch;
	for(epoch = 0; epoch < MAXEPOCHS; epoch++)
	{
		if(shuffle_training)
			std::random_shuffle ( TrainingData.begin(), TrainingData.end() );
		
		
		double accumulated_squared_error = 0.0;
		
		//Present each training data point and train the network.
		int presentation;
		for(presentation = 0; presentation < TrainingData.size(); presentation++)
		{
			//Do forward-propagation
			ForwardPropagate(InputLayer, TrainingData[presentation].x[0], TrainingData[presentation].x[1]);
			
			//Ready desired outputs based on training data class.
			const double *desired_outputs = desired_output_for_class(TrainingData[presentation].desired);
			
			//Calculate errors
			double output_errors[4];
			for(unsigned int o = 0; o < 4; o++)
			{
				output_errors[o] = desired_outputs[o] - OutputLayer.Neurons[o].y;
				accumulated_squared_error += (output_errors[o] * output_errors[o]) / 4.0;
			}
			
			//Calculate partial derivatives for output, update weights
			for(unsigned int o = 0; o < 4; o++)
			{
				OutputLayer.Neurons[o].partial_deriv = output_errors[o] * sigmoidprime(OutputLayer.Neurons[o].v);
			}
					
			//Backward propagate 
			Layer *TargetLayer = &OutputLayer;
			//For each layer, we'll adjust weights and find derivatives for the previous layer
			while(TargetLayer->PreviousLayer != NULL)
			{
				//Zero all the partial derivatives on the previous layer
				for(unsigned int exciter = 0; exciter < TargetLayer->PreviousLayer->Neurons.size(); exciter++)
				{
					TargetLayer->PreviousLayer->Neurons[exciter].partial_deriv = 0.0;
				}
				
				//Loop through all neurons in the target layer
				//For each one, loop through all its weights (i.e., all the previous layer neurons).
				//Adjust weights based on excitation and current partial.
				//Accumulate new partials for the "previous" layer.
				for(unsigned int target = 0; target < TargetLayer->Neurons.size(); target++)
				{
					Neuron *t = &(TargetLayer->Neurons[target]);
					
					assert(t->IncomingWeights.size() == TargetLayer->PreviousLayer->Neurons.size());
					for(unsigned int exciter = 0; exciter < t->IncomingWeights.size(); exciter++)
					{
						Neuron *e = &(TargetLayer->PreviousLayer->Neurons[exciter]);
						
						//calculate this target's contribution to the exciter's partial
						e->partial_deriv += sigmoidprime(e->v) * t->IncomingWeights[exciter] * t->partial_deriv;
						
						//adjust the weight based on how strongly it was active, and which way it needs to go
						t->IncomingWeightsMomentum[exciter] *= momentum_scale;
						t->IncomingWeightsMomentum[exciter] += e->y * t->partial_deriv * eta;
						t->IncomingWeights[exciter] += t->IncomingWeightsMomentum[exciter];
						

					}
					
					//Do the bias weight for this target as well.
					t->BiasWeight += t->partial_deriv * eta;
				}
				
				
				TargetLayer = TargetLayer->PreviousLayer;
			}
			
		}
		
		//Reset error measure
		double training_mse = sqrt(accumulated_squared_error / (double)presentation);
		accumulated_squared_error = 0;
		
		//Testing
		for(presentation = 0; presentation < TestingData.size(); presentation++)
		{
			//Forward propagation on test data
			ForwardPropagate(InputLayer, TestingData[presentation].x[0], TestingData[presentation].x[1]);
			
			//Ready desired outputs based on training data class.
			const double *desired_outputs = desired_output_for_class(TestingData[presentation].desired);
	
			//Calculate errors
			double output_errors[4];
			for(unsigned int o = 0; o < 4; o++)
			{
				output_errors[o] = desired_outputs[o] - OutputLayer.Neurons[o].y;
				accumulated_squared_error += (output_errors[o] * output_errors[o]) / 4.0;
			}
		}
		double testing_mse = sqrt(accumulated_squared_error / (double)presentation);

		printf("Epoch %d trained, found %lf rms training, %lf rms testing.\n", epoch, training_mse, testing_mse);
		fprintf(genf, "%d %lf %lf\n", epoch, training_mse, testing_mse);
		
		//Dump grid if requested
		if(should_dump)
		{
			fprintf(dumpfile, "FRAME\n");
			
			unsigned char y[DUMPFRAMEDIM][DUMPFRAMEDIM];
			char cb[DUMPFRAMEDIM][DUMPFRAMEDIM];
			char cr[DUMPFRAMEDIM][DUMPFRAMEDIM];
			
			for(int yp = 0; yp < DUMPFRAMEDIM; yp++)
			{
				for(int xp = 0; xp < DUMPFRAMEDIM; xp++)
				{
					double gridx = (((double)xp / (double)DUMPFRAMEDIM) * 2.5) - 0.5;
					double gridy = (((double)yp / (double)DUMPFRAMEDIM) * 2.5) - 0.5;
					ForwardPropagate(InputLayer, gridx, gridy);
					
					unsigned char ly = 200;
					char lcb;
					char lcr;
					
					if(OutputLayer.Neurons[0].y >= OutputLayer.Neurons[1].y && 
						OutputLayer.Neurons[0].y >= OutputLayer.Neurons[2].y && 
						OutputLayer.Neurons[0].y >= OutputLayer.Neurons[3].y)
					{
						lcb = 255;
						lcr = 255;
					}
					else if(OutputLayer.Neurons[1].y >= OutputLayer.Neurons[0].y && 
						OutputLayer.Neurons[1].y >= OutputLayer.Neurons[2].y && 
						OutputLayer.Neurons[1].y >= OutputLayer.Neurons[3].y)
					{
						lcb = 0;
						lcr = 255;
					}
					else if(OutputLayer.Neurons[2].y >= OutputLayer.Neurons[1].y && 
						OutputLayer.Neurons[2].y >= OutputLayer.Neurons[0].y && 
						OutputLayer.Neurons[2].y >= OutputLayer.Neurons[3].y)
					{
						lcb = 255;
						lcr = 0;
					}
					else
					{
						lcb = 0;
						lcr = 0;
					}
					
					y[yp][xp] = ly;
					cb[yp][xp] = lcb;
					cr[yp][xp] = lcr;
				}
			}
			
			fwrite(y, 1, DUMPFRAMEDIM*DUMPFRAMEDIM, dumpfile);
			fwrite(cb, 1, DUMPFRAMEDIM*DUMPFRAMEDIM, dumpfile);
			fwrite(cr, 1, DUMPFRAMEDIM*DUMPFRAMEDIM, dumpfile);
		}
		
		//Abort if we haven't moved significantly		
		if(training_mse < last_training_error * 0.9999)
		{
			epochs_since_significant_improvement = 0;
			last_training_error = training_mse;
		}
		else
			epochs_since_significant_improvement++;
		
		if(epochs_since_significant_improvement > 100)
			break;
	}
	fclose(genf);
	if(dumpfile)
		fclose(dumpfile);
	
	//print a grid of the final decision boundary
	filename.seekp(0);
	filename << "bound/bound_eta"<< eta << "_mom"<< momentum_scale << "_rseed"<< rseed << "_shuf"<< shuffle_training <<"_";
	for(unsigned int a = 0; a < hidden_layer_node_counts.size(); a++)
	{
		if(a != 0)
			filename << "-";
		filename  << hidden_layer_node_counts[a];
	}
	
	printf("Printing final decision plot to %s\n", filename.str().c_str());
	FILE *boundf = fopen(filename.str().c_str(), "w");
	assert(boundf);
	for(int yp = 0; yp < DUMPFRAMEDIM; yp++)
	{
		for(int xp = 0; xp < DUMPFRAMEDIM; xp++)
		{
			double gridx = (((double)xp / (double)DUMPFRAMEDIM) * 2.5) - 0.5;
			double gridy = (((double)yp / (double)DUMPFRAMEDIM) * 2.5) - 0.5;
			ForwardPropagate(InputLayer, gridx, gridy);
			
			if(OutputLayer.Neurons[0].y >= OutputLayer.Neurons[1].y && 
				OutputLayer.Neurons[0].y >= OutputLayer.Neurons[2].y && 
				OutputLayer.Neurons[0].y >= OutputLayer.Neurons[3].y)
			{
				fprintf(boundf, "%lf %lf %d\n", gridx, gridy, 1);
			}
			else if(OutputLayer.Neurons[1].y >= OutputLayer.Neurons[0].y && 
				OutputLayer.Neurons[1].y >= OutputLayer.Neurons[2].y && 
				OutputLayer.Neurons[1].y >= OutputLayer.Neurons[3].y)
			{
				fprintf(boundf, "%lf %lf %d\n", gridx, gridy, 2);
			}
			else if(OutputLayer.Neurons[2].y >= OutputLayer.Neurons[1].y && 
				OutputLayer.Neurons[2].y >= OutputLayer.Neurons[0].y && 
				OutputLayer.Neurons[2].y >= OutputLayer.Neurons[3].y)
			{
				fprintf(boundf, "%lf %lf %d\n", gridx, gridy, 3);
			}
			else
			{
				fprintf(boundf, "%lf %lf %d\n", gridx, gridy, 4);
			}
		}
	}
	fclose(boundf);
}