initial github commit
Implementation of backprop in C using Grand Central Dispatch and Blocks
This commit is contained in:
753
nn.c
Normal file
753
nn.c
Normal file
@@ -0,0 +1,753 @@
|
||||
/*
|
||||
Author: James Griffin-Allwood
|
||||
Date: March 13 2014
|
||||
|
||||
Description: Implementation of the learning system and model
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stddef.h>
|
||||
#include <math.h>
|
||||
#include <Block.h>
|
||||
#include <dispatch/dispatch.h>
|
||||
#include "process.h"
|
||||
#include "nn.h"
|
||||
|
||||
int free_matrix(matrix * to_free) {
|
||||
if (to_free != NULL) {
|
||||
if (to_free->weight_matrix != NULL) {
|
||||
for (int i = 0; i < to_free->rows; i++) {
|
||||
free(to_free->weight_matrix[i]);
|
||||
}
|
||||
free(to_free->weight_matrix);
|
||||
}
|
||||
|
||||
free(to_free);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int free_model(nn_model * to_free) {
|
||||
if (to_free != NULL) {
|
||||
if (to_free->nodes_per_layer != NULL) {
|
||||
free(to_free->nodes_per_layer);
|
||||
}
|
||||
|
||||
if (to_free->layer_weights != NULL) {
|
||||
for (int i = 0; i < to_free->layers; i++) {
|
||||
free_matrix(to_free->layer_weights[i]);
|
||||
}
|
||||
}
|
||||
if (to_free->previous_weight_updates != NULL) {
|
||||
for (int i = 0; i < to_free->layers; i++) {
|
||||
free_matrix(to_free->previous_weight_updates[i]);
|
||||
}
|
||||
}
|
||||
reset_model_vectors(to_free);
|
||||
free(to_free);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
matrix * create_matrix(int r, int c) {
|
||||
matrix * new;
|
||||
|
||||
if ((new = calloc(1, sizeof(matrix))) == NULL) {
|
||||
fprintf(stderr, "Unable to run the activation function\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((new->weight_matrix = calloc(r, sizeof(double *))) == NULL) {
|
||||
fprintf(stderr, "Unable to run the activation function\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (int i = 0; i < r; i++) {
|
||||
if ((new->weight_matrix[i] = calloc(c, sizeof(double))) == NULL) {
|
||||
fprintf(stderr, "Unable to run the activation function\n");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
new->rows = r;
|
||||
new->columns = c;
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
struct nn_model * create_model(const double rate, const int layers, const int layer_nodes[], const int outputs) {
|
||||
nn_model * new_model;
|
||||
matrix ** layer_weights;
|
||||
matrix ** layer_inputs;
|
||||
matrix ** layer_outputs;
|
||||
|
||||
if ((new_model = calloc(1, sizeof(nn_model))) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
new_model->previous_weight_updates = NULL;
|
||||
new_model->momentum = DEFAULT_MOMENTUM;
|
||||
new_model->learning_rate = rate;
|
||||
new_model->layers = layers;
|
||||
new_model->outputs = outputs;
|
||||
|
||||
if ((new_model->nodes_per_layer = calloc(layers, sizeof(int))) == NULL) {
|
||||
fprintf(stderr, "Unable to create model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (int i = 0; i < new_model->layers; i++) {
|
||||
new_model->nodes_per_layer[i] = layer_nodes[i];
|
||||
}
|
||||
|
||||
if ((layer_weights = calloc(layers, sizeof(matrix *))) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((layer_inputs = calloc(layers, sizeof(matrix *))) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((layer_outputs = calloc(layers, sizeof(matrix *))) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// initialize the input and output vector arrays to NULL
|
||||
for (int i = 0; i < layers; i++) {
|
||||
layer_inputs[i] = NULL;
|
||||
layer_outputs[i] = NULL;
|
||||
}
|
||||
|
||||
// Create the connection weight matricies between the layers (except last hidden layer and outputs)
|
||||
for (int i = 0; i < (layers - 1); i++) {
|
||||
if ((layer_weights[i] = create_matrix(layer_nodes[i], layer_nodes[i + 1])) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Create connection weight matrix between last hidden layer and output
|
||||
if ((layer_weights[layers - 1] = create_matrix(layer_nodes[layers - 1], outputs)) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Initialize all weights in the network to random values between -0.5 and 0.5
|
||||
for (int i = 0; i < new_model->layers; i++) {
|
||||
if (i == (new_model->layers - 1)) {
|
||||
for (int j = 0; j < new_model->nodes_per_layer[i]; j++) {
|
||||
for (int k = 0; k < new_model->outputs; k++) {
|
||||
layer_weights[i]->weight_matrix[j][k]
|
||||
= ((double)(arc4random_uniform(100) / 100.0) - 0.5);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int j = 0; j < new_model->nodes_per_layer[i]; j++) {
|
||||
for (int k = 0; k < new_model->nodes_per_layer[i + 1]; k++) {
|
||||
layer_weights[i]->weight_matrix[j][k]
|
||||
= ((double)(arc4random_uniform(100) / 100.0) - 0.5);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
new_model->layer_weights = layer_weights;
|
||||
new_model->layer_input_vectors = layer_inputs;
|
||||
new_model->layer_output_vectors = layer_outputs;
|
||||
new_model->output = NULL;
|
||||
|
||||
new_model->previous_weight_updates = NULL;
|
||||
|
||||
return new_model;
|
||||
}
|
||||
|
||||
struct nn_model * copy_model(const struct nn_model * model) {
|
||||
nn_model * new_model;
|
||||
|
||||
if ((new_model = calloc(1, sizeof(nn_model))) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
new_model->momentum = model->momentum;
|
||||
new_model->learning_rate = model->learning_rate;
|
||||
new_model->layers = model->layers;
|
||||
new_model->outputs = model->outputs;
|
||||
|
||||
new_model->previous_weight_updates = NULL;
|
||||
|
||||
if ((new_model->nodes_per_layer = calloc(new_model->layers, sizeof(int))) == NULL) {
|
||||
fprintf(stderr, "Unable to copy model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (int i = 0; i < new_model->layers; i++) {
|
||||
new_model->nodes_per_layer[i] = model->nodes_per_layer[i];
|
||||
}
|
||||
|
||||
if ((new_model->layer_weights = calloc(new_model->layers, sizeof(matrix *))) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((new_model->layer_input_vectors = calloc(new_model->layers, sizeof(matrix *))) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((new_model->layer_output_vectors = calloc(new_model->layers, sizeof(matrix *))) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// initialize the input and output vector arrays to NULL
|
||||
for (int i = 0; i < new_model->layers; i++) {
|
||||
new_model->layer_input_vectors[i] = NULL;
|
||||
new_model->layer_output_vectors[i] = NULL;
|
||||
}
|
||||
|
||||
if ((new_model->output = calloc(1, sizeof(matrix))) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Create the connection weight matricies between the layers (except last hidden layer and outputs)
|
||||
for (int i = 0; i < (new_model->layers - 1); i++) {
|
||||
if ((new_model->layer_weights[i]
|
||||
= create_matrix(model->layer_weights[i]->rows, model->layer_weights[i]->columns)) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Create connection weight, input and output matrix between last hidden layer and output
|
||||
if ((new_model->layer_weights[new_model->layers - 1] =
|
||||
create_matrix(model->nodes_per_layer[new_model->layers - 1], new_model->outputs)) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// copy all the network weights
|
||||
for (int i = 0; i < new_model->layers; i++) {
|
||||
if (i == (new_model->layers - 1)) {
|
||||
for (int j = 0; j < new_model->nodes_per_layer[i]; j++) {
|
||||
for (int k = 0; k < new_model->outputs; k++) {
|
||||
new_model->layer_weights[i]->weight_matrix[j][k]
|
||||
= model->layer_weights[i]->weight_matrix[j][k];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int j = 0; j < new_model->nodes_per_layer[i]; j++) {
|
||||
for (int k = 0; k < new_model->nodes_per_layer[i + 1]; k++) {
|
||||
new_model->layer_weights[i]->weight_matrix[j][k]
|
||||
= model->layer_weights[i]->weight_matrix[j][k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new_model;
|
||||
}
|
||||
|
||||
int save_model(const nn_model * m, const char * file) {
|
||||
FILE * out;
|
||||
|
||||
if ((out= fopen(file, "w")) == NULL) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
fprintf(out, "%lf\t", m->learning_rate);
|
||||
fprintf(out, "%lf\t", m->momentum);
|
||||
fprintf(out, "%d\t", m->layers);
|
||||
fprintf(out, "%d\t\n", m->outputs);
|
||||
for (int i = 0; i < m->layers; i++) {
|
||||
fprintf(out, "%d\t", m->nodes_per_layer[i]);
|
||||
}
|
||||
for (int i = 0; i < m->layers; i++) {
|
||||
for (int j = 0; j < m->layer_weights[i]->rows; j++) {
|
||||
for (int k = 0; k < m->layer_weights[i]->columns; k++) {
|
||||
fprintf(out, "%lf\t", m->layer_weights[i]->weight_matrix[j][k]);
|
||||
}
|
||||
fprintf(out, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (fclose(out) == EOF) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct nn_model * load_model(char * file) {
|
||||
FILE * temp;
|
||||
nn_model * loaded;
|
||||
|
||||
if ((temp = fopen(file, "r")) == NULL) {
|
||||
fprintf(stderr, "Unable to load model from %s", file);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((loaded = calloc(1, sizeof(nn_model))) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fscanf(temp, "%lf\t", &loaded->learning_rate);
|
||||
fscanf(temp, "%lf\t", &loaded->momentum);
|
||||
fscanf(temp, "%d\t", &loaded->layers);
|
||||
fscanf(temp, "%d\t\n", &loaded->outputs);
|
||||
|
||||
if ((loaded->nodes_per_layer = calloc(loaded->layers, sizeof(int))) == NULL) {
|
||||
fprintf(stderr, "Unable to copy model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (int i = 0; i < loaded->layers; i++) {
|
||||
fscanf(temp, "%d\t", &loaded->nodes_per_layer[i]);
|
||||
}
|
||||
|
||||
if ((loaded->layer_weights = calloc(loaded->layers, sizeof(matrix *))) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (int i = 0; i < loaded->layers; i++) {
|
||||
int columns = 0;
|
||||
int rows = loaded->nodes_per_layer[i];
|
||||
if (i != loaded->layers) {
|
||||
columns = loaded->nodes_per_layer[i + 1];
|
||||
} else {
|
||||
columns = loaded->outputs;
|
||||
}
|
||||
loaded->layer_weights[i] = create_matrix(rows, columns);
|
||||
for (int j = 0; j < loaded->layer_weights[i]->rows; j++) {
|
||||
for (int k = 0; k < loaded->layer_weights[i]->columns; k++) {
|
||||
fscanf(temp, "%lf\t", &loaded->layer_weights[i]->weight_matrix[j][k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((loaded->layer_input_vectors = calloc(loaded->layers, sizeof(matrix *))) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((loaded->layer_output_vectors = calloc(loaded->layers, sizeof(matrix *))) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// initialize the input and output vector arrays to NULL
|
||||
for (int i = 0; i < loaded->layers; i++) {
|
||||
loaded->layer_input_vectors[i] = NULL;
|
||||
loaded->layer_output_vectors[i] = NULL;
|
||||
}
|
||||
|
||||
if ((loaded->output = calloc(1, sizeof(matrix))) == NULL) {
|
||||
fprintf(stderr, "Not enough memory for model\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
loaded->output = NULL;
|
||||
loaded->previous_weight_updates = NULL;
|
||||
|
||||
return loaded;
|
||||
}
|
||||
|
||||
matrix * multiply_matricies(const matrix * a, const matrix * b) {
|
||||
matrix * result;
|
||||
|
||||
if (a->columns != b->rows) {
|
||||
fprintf(stderr, "Unable to multiply these matricies\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((result = create_matrix(a->rows, b->columns)) == NULL) {
|
||||
fprintf(stderr, "Unable to allocated memory formatrix result\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (int i = 0; i < result->rows; i++) {
|
||||
for (int j = 0; j < result->columns; j++) {
|
||||
double value = 0.0;
|
||||
for (int k = 0; k < a->columns; k++) {
|
||||
value += a->weight_matrix[i][k] * b->weight_matrix[k][j];
|
||||
}
|
||||
result->weight_matrix[i][j] = value;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int add_matricies(matrix * a, const matrix * b) {
|
||||
if (a->rows != b->rows || a->columns != b->columns) {
|
||||
fprintf(stderr, "Unable to add matricies\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < a->rows; i++) {
|
||||
for (int j = 0; j < a->columns; j++) {
|
||||
a->weight_matrix[i][j] = a->weight_matrix[i][j] + b->weight_matrix[i][j];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
matrix * activation_function(const matrix * input_vector) {
|
||||
matrix * output_vector;
|
||||
|
||||
if (input_vector->rows != 1) {
|
||||
fprintf(stderr, "input vectors for a layer must be nx1\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((output_vector = create_matrix(input_vector->rows, input_vector->columns)) == NULL) {
|
||||
fprintf(stderr, "Unable to run the activation function\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (int i = 0; i < input_vector->columns; i++) {
|
||||
double sigmoid = 1 / (1 + exp(-1 * input_vector->weight_matrix[0][i]));
|
||||
output_vector->weight_matrix[0][i] = sigmoid;
|
||||
}
|
||||
|
||||
return output_vector;
|
||||
}
|
||||
|
||||
int classify_instance(nn_model * current, message * input, const int size) {
|
||||
int layers = current->layers;
|
||||
matrix * input_vector;
|
||||
matrix * output_vector;
|
||||
double bias = 1.0;
|
||||
|
||||
reset_model_vectors(current);
|
||||
|
||||
if ((input_vector = create_matrix(1, size)) == NULL) {
|
||||
fprintf(stderr, "Unable to classify the instance\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < input_vector->columns; i++) {
|
||||
input_vector->weight_matrix[0][i] = input->text_vector[i];
|
||||
}
|
||||
current->layer_output_vectors[0] = input_vector;
|
||||
|
||||
for (int i = 0; i < layers; i++) {
|
||||
// add bias input
|
||||
current->layer_output_vectors[i]->weight_matrix[0][current->layer_output_vectors[i]->columns - 1] = bias;
|
||||
current->layer_input_vectors[i] =
|
||||
multiply_matricies(current->layer_output_vectors[i], current->layer_weights[i]);
|
||||
if (current->layer_input_vectors[i] == NULL) {
|
||||
fprintf(stderr, "Unable to classify\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (i != (layers - 1)) {
|
||||
current->layer_output_vectors[i + 1] = activation_function(current->layer_input_vectors[i]);
|
||||
if (current->layer_output_vectors[i + 1] == NULL) {
|
||||
fprintf(stderr, "Unable to classify\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
output_vector = activation_function(current->layer_input_vectors[layers - 1]);
|
||||
current->output = output_vector;
|
||||
if (output_vector == NULL) {
|
||||
fprintf(stderr, "Unable to classify\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < PRO_CON_OUTPUT; i++) {
|
||||
input->prediction_probability[i] = current->output->weight_matrix[0][i];
|
||||
}
|
||||
|
||||
if (input->prediction_probability[0] > input->prediction_probability[1]) {
|
||||
input->prediction = CON;
|
||||
} else {
|
||||
input->prediction = PRO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int reset_model_vectors(nn_model * m) {
|
||||
free_matrix(m->output);
|
||||
m->output = NULL;
|
||||
for (int i = 0; i < m->layers; i++) {
|
||||
if (m->layer_input_vectors[i] != NULL) {
|
||||
free_matrix(m->layer_input_vectors[i]);
|
||||
m->layer_input_vectors[i] = NULL;
|
||||
}
|
||||
if (m->layer_output_vectors[i] != NULL) {
|
||||
free_matrix(m->layer_output_vectors[i]);
|
||||
m->layer_output_vectors[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int backprop_update(nn_model * update, matrix * output) {
|
||||
matrix * output_error;
|
||||
matrix ** hidden_error;
|
||||
matrix ** weight_updates;
|
||||
int use_momentum = 0;
|
||||
int hidden_layers = update->layers - 1;
|
||||
|
||||
if ((output_error = create_matrix(1, PRO_CON_OUTPUT)) == NULL) {
|
||||
fprintf(stderr, "Unable to allocate memory for error term\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// allocated enough memory for an error matrix for every layer (except input layer)
|
||||
if ((hidden_error = calloc(hidden_layers, sizeof(matrix *))) == NULL) {
|
||||
fprintf(stderr, "Unable to allocate memory for hidden errors\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// allocate enough memory for all the weight updates
|
||||
if ((weight_updates = calloc(update->layers, sizeof(matrix *))) == NULL) {
|
||||
fprintf(stderr, "Unable to allocate memory for error updates\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < update->layers; i++) {
|
||||
weight_updates[i] = create_matrix(update->layer_weights[i]->rows, update->layer_weights[i]->columns);
|
||||
if (weight_updates[i] == NULL) {
|
||||
fprintf(stderr, "Unable to store weight updates\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the error outputs
|
||||
for (int i = 0; i < PRO_CON_OUTPUT; i++) {
|
||||
output_error->weight_matrix[0][i] =
|
||||
(update->output->weight_matrix[0][i]
|
||||
* (1 - update->output->weight_matrix[0][i])
|
||||
* (output->weight_matrix[0][i] - update->output->weight_matrix[0][i]));
|
||||
}
|
||||
|
||||
for (int i = hidden_layers; i > 0; i--) {
|
||||
int forward_nodes = 0;
|
||||
matrix * forward_error;
|
||||
if (i == hidden_layers) {
|
||||
forward_nodes = update->outputs;
|
||||
forward_error = output_error;
|
||||
} else {
|
||||
forward_nodes = update->nodes_per_layer[i + 1];
|
||||
forward_error = hidden_error[i];
|
||||
}
|
||||
|
||||
hidden_error[i - 1] = create_matrix(1, update->nodes_per_layer[i]);
|
||||
if (hidden_error[i - 1] == NULL) {
|
||||
fprintf(stderr, "Unable to allocate memory for hidden layer errors\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (int j = 0; j < update->nodes_per_layer[i]; j++) {
|
||||
double error_sum = 0.0;
|
||||
for (int k = 0; k < forward_nodes; k++) {
|
||||
|
||||
error_sum +=
|
||||
update->layer_weights[i]->weight_matrix[j][k] * forward_error->weight_matrix[0][k];
|
||||
}
|
||||
|
||||
hidden_error[i - 1]->weight_matrix[0][j] =
|
||||
(update->layer_output_vectors[i]->weight_matrix[0][j]
|
||||
* (1 - update->layer_output_vectors[i]->weight_matrix[0][j])
|
||||
* error_sum);
|
||||
}
|
||||
}
|
||||
|
||||
if (update->previous_weight_updates != NULL) {
|
||||
use_momentum = 1;
|
||||
}
|
||||
|
||||
// Compute Weight Updates
|
||||
for (int i = update->layers; i > 0; i--) {
|
||||
matrix * forward_error;
|
||||
if (i == update->layers) {
|
||||
forward_error = output_error;
|
||||
} else {
|
||||
forward_error = hidden_error[i - 1];
|
||||
}
|
||||
for (int j = 0; j < update->layer_weights[i - 1]->rows; j++) {
|
||||
for (int k = 0; k < update->layer_weights[i - 1]->columns; k++) {
|
||||
double momentum_term = 0.0;
|
||||
if (use_momentum) {
|
||||
momentum_term = update->momentum * update->previous_weight_updates[i - 1]->weight_matrix[j][k];
|
||||
}
|
||||
weight_updates[i - 1]->weight_matrix[j][k] =
|
||||
(update->learning_rate
|
||||
* forward_error->weight_matrix[0][k]
|
||||
* update->layer_output_vectors[i - 1]->weight_matrix[0][j])
|
||||
+ momentum_term;
|
||||
update->layer_weights[i - 1]->weight_matrix[j][k] += weight_updates[i - 1]->weight_matrix[j][k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free_matrix(output_error);
|
||||
|
||||
if (hidden_error != NULL) {
|
||||
for (int i = 0; i < hidden_layers; i++) {
|
||||
free_matrix(hidden_error[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (update->previous_weight_updates != NULL) {
|
||||
for (int i = 0; i < update->layers; i++) {
|
||||
free_matrix(update->previous_weight_updates[i]);
|
||||
}
|
||||
}
|
||||
update->previous_weight_updates = weight_updates;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
nn_model * train_model(nn_model * m, data * training_data, data * test_data, double error_rate, int epoch_max) {
|
||||
int epochs = 0;
|
||||
double test_error_rate = 1;
|
||||
double last_error_rate = 1;
|
||||
int epochs_error_increase = 0;
|
||||
nn_model * best = NULL;
|
||||
|
||||
while (epochs < epoch_max) {
|
||||
matrix * expected_result = create_matrix(1, m->outputs);
|
||||
double correctly_classified_test = 0;
|
||||
double correctly_classified_train = 0;
|
||||
double current_error_rate = 1;
|
||||
double train_error_rate = 1;
|
||||
|
||||
if (epochs_error_increase > EPOCH_MAX_ERROR_INCREASE) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Run all of the instances through the network to train
|
||||
for (int i = 0; i < training_data->count; i++) {
|
||||
classify_instance(m, training_data->instances[i], m->nodes_per_layer[0]);
|
||||
|
||||
if (training_data->instances[i]->class == PRO) {
|
||||
expected_result->weight_matrix[0][0] = 0;
|
||||
expected_result->weight_matrix[0][1] = 1;
|
||||
} else {
|
||||
expected_result->weight_matrix[0][0] = 1;
|
||||
expected_result->weight_matrix[0][1] = 0;
|
||||
}
|
||||
backprop_update(m, expected_result);
|
||||
}
|
||||
free_matrix(expected_result);
|
||||
|
||||
// Compute the training error rate for this epoch
|
||||
for (int i = 0; i < training_data->count; i++) {
|
||||
if (training_data->instances[i]->class == training_data->instances[i]->prediction) {
|
||||
correctly_classified_train++;
|
||||
}
|
||||
}
|
||||
train_error_rate = (1 - (correctly_classified_train / training_data->count));
|
||||
|
||||
// Classify the Test Set and compute error rate for this epoch.
|
||||
classify_dataset(m, test_data);
|
||||
for (int i = 0; i < test_data->count; i++) {
|
||||
if (test_data->instances[i]->class == test_data->instances[i]->prediction) {
|
||||
correctly_classified_test++;
|
||||
}
|
||||
}
|
||||
current_error_rate = (1 - (correctly_classified_test / test_data->count));
|
||||
|
||||
// Check to see if the error rate is a new minimum
|
||||
if (current_error_rate < test_error_rate) {
|
||||
test_error_rate = current_error_rate;
|
||||
fprintf(stdout, "Epoch %3d: New best test error rate found %lf\n", epochs, test_error_rate);
|
||||
free_model(best);
|
||||
best = copy_model(m);
|
||||
epochs_error_increase = 0;
|
||||
} else if (current_error_rate >= last_error_rate) {
|
||||
epochs_error_increase++;
|
||||
} else {
|
||||
epochs_error_increase = 0;
|
||||
}
|
||||
|
||||
// Print out error rates for plotting every 10 epochs
|
||||
if ((epochs % 5) == 0) {
|
||||
fprintf(stdout, "Epoch %3d:\ttrain error:%lf\ttest error:%lf\n",
|
||||
epochs, train_error_rate, current_error_rate);
|
||||
}
|
||||
last_error_rate = current_error_rate;
|
||||
epochs++;
|
||||
}
|
||||
|
||||
// Report the error rate before returning.
|
||||
fprintf(stdout, "Trained model to an test error rate of %lf\n", test_error_rate);
|
||||
free_model(m);
|
||||
return best;
|
||||
}
|
||||
|
||||
int classify_dataset(nn_model * m, data * set) {
|
||||
// Use Grand Central Dispatch and Blocks to multithread this task for performance
|
||||
dispatch_apply(set->count, dispatch_get_global_queue(0, 0), ^ (size_t i) {
|
||||
nn_model * classifier = copy_model(m);
|
||||
classify_instance(classifier, set->instances[i], classifier->nodes_per_layer[0]);
|
||||
free_model(classifier);
|
||||
});
|
||||
|
||||
// int pro = 0;
|
||||
// int con = 0;
|
||||
// for (int i = 0; i < set->count; i++) {
|
||||
// if (set->instances[i]->prediction == PRO) {
|
||||
// pro++;
|
||||
// } else {
|
||||
// con++;
|
||||
// }
|
||||
// }
|
||||
// fprintf(stdout, "Classified %d Pros and %d Cons\n", pro, con);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void print_confusion_matrix(data * set) {
|
||||
matrix * confusion_matrix = create_matrix(2, 2);
|
||||
char * class_label[] = { "Con", "Pro" };
|
||||
|
||||
for (int i = 0; i < set->count; i++) {
|
||||
confusion_matrix->weight_matrix[set->instances[i]->class][set->instances[i]->prediction]++;
|
||||
}
|
||||
|
||||
print_matrix(confusion_matrix, class_label);
|
||||
double accuracy
|
||||
= ((confusion_matrix->weight_matrix[0][0] + confusion_matrix->weight_matrix[1][1]) / set->count)
|
||||
* 100;
|
||||
fprintf(stdout, "The model corretly classified %.2lf%% of the instances\n", accuracy);
|
||||
free_matrix(confusion_matrix);
|
||||
}
|
||||
|
||||
void print_matrix(matrix * m, char ** labels) {
|
||||
int labeled = 0;
|
||||
if (labels != NULL) {
|
||||
labeled = 1;
|
||||
}
|
||||
|
||||
if (labeled) {
|
||||
for (int i = 0; i < m->rows; i++) {
|
||||
fprintf(stdout, "%d\t", i);
|
||||
}
|
||||
fprintf(stdout, "| <- Classified as\n");
|
||||
}
|
||||
|
||||
for (int i = 0; i < m->rows; i++) {
|
||||
for (int j = 0; j < m->columns; j++) {
|
||||
fprintf(stdout, "%.0lf\t", m->weight_matrix[i][j]);
|
||||
}
|
||||
if (labeled) {
|
||||
fprintf(stdout, "| %d - %s", i, labels[i]);
|
||||
}
|
||||
fprintf(stdout, "\n");
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user