/* Author: James Griffin-Allwood Date: March 13 2014 Description: Implementation of the learning system and model */ #include #include #include #include #include #include #include #include "process.h" #include "nn.h" int free_matrix(matrix * to_free) { if (to_free != NULL) { if (to_free->weight_matrix != NULL) { for (int i = 0; i < to_free->rows; i++) { free(to_free->weight_matrix[i]); } free(to_free->weight_matrix); } free(to_free); } return 0; } int free_model(nn_model * to_free) { if (to_free != NULL) { if (to_free->nodes_per_layer != NULL) { free(to_free->nodes_per_layer); } if (to_free->layer_weights != NULL) { for (int i = 0; i < to_free->layers; i++) { free_matrix(to_free->layer_weights[i]); } } if (to_free->previous_weight_updates != NULL) { for (int i = 0; i < to_free->layers; i++) { free_matrix(to_free->previous_weight_updates[i]); } } reset_model_vectors(to_free); free(to_free); } return 0; } matrix * create_matrix(int r, int c) { matrix * new; if ((new = calloc(1, sizeof(matrix))) == NULL) { fprintf(stderr, "Unable to run the activation function\n"); return NULL; } if ((new->weight_matrix = calloc(r, sizeof(double *))) == NULL) { fprintf(stderr, "Unable to run the activation function\n"); return NULL; } for (int i = 0; i < r; i++) { if ((new->weight_matrix[i] = calloc(c, sizeof(double))) == NULL) { fprintf(stderr, "Unable to run the activation function\n"); return NULL; } } new->rows = r; new->columns = c; return new; } struct nn_model * create_model(const double rate, const int layers, const int layer_nodes[], const int outputs) { nn_model * new_model; matrix ** layer_weights; matrix ** layer_inputs; matrix ** layer_outputs; if ((new_model = calloc(1, sizeof(nn_model))) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } new_model->previous_weight_updates = NULL; new_model->momentum = DEFAULT_MOMENTUM; new_model->learning_rate = rate; new_model->layers = layers; new_model->outputs = outputs; if ((new_model->nodes_per_layer = calloc(layers, sizeof(int))) == NULL) { fprintf(stderr, "Unable to create model\n"); return NULL; } for (int i = 0; i < new_model->layers; i++) { new_model->nodes_per_layer[i] = layer_nodes[i]; } if ((layer_weights = calloc(layers, sizeof(matrix *))) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } if ((layer_inputs = calloc(layers, sizeof(matrix *))) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } if ((layer_outputs = calloc(layers, sizeof(matrix *))) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } // initialize the input and output vector arrays to NULL for (int i = 0; i < layers; i++) { layer_inputs[i] = NULL; layer_outputs[i] = NULL; } // Create the connection weight matricies between the layers (except last hidden layer and outputs) for (int i = 0; i < (layers - 1); i++) { if ((layer_weights[i] = create_matrix(layer_nodes[i], layer_nodes[i + 1])) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } } // Create connection weight matrix between last hidden layer and output if ((layer_weights[layers - 1] = create_matrix(layer_nodes[layers - 1], outputs)) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } // Initialize all weights in the network to random values between -0.5 and 0.5 for (int i = 0; i < new_model->layers; i++) { if (i == (new_model->layers - 1)) { for (int j = 0; j < new_model->nodes_per_layer[i]; j++) { for (int k = 0; k < new_model->outputs; k++) { layer_weights[i]->weight_matrix[j][k] = ((double)(arc4random_uniform(100) / 100.0) - 0.5); } } } else { for (int j = 0; j < new_model->nodes_per_layer[i]; j++) { for (int k = 0; k < new_model->nodes_per_layer[i + 1]; k++) { layer_weights[i]->weight_matrix[j][k] = ((double)(arc4random_uniform(100) / 100.0) - 0.5); } } } } new_model->layer_weights = layer_weights; new_model->layer_input_vectors = layer_inputs; new_model->layer_output_vectors = layer_outputs; new_model->output = NULL; new_model->previous_weight_updates = NULL; return new_model; } struct nn_model * copy_model(const struct nn_model * model) { nn_model * new_model; if ((new_model = calloc(1, sizeof(nn_model))) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } new_model->momentum = model->momentum; new_model->learning_rate = model->learning_rate; new_model->layers = model->layers; new_model->outputs = model->outputs; new_model->previous_weight_updates = NULL; if ((new_model->nodes_per_layer = calloc(new_model->layers, sizeof(int))) == NULL) { fprintf(stderr, "Unable to copy model\n"); return NULL; } for (int i = 0; i < new_model->layers; i++) { new_model->nodes_per_layer[i] = model->nodes_per_layer[i]; } if ((new_model->layer_weights = calloc(new_model->layers, sizeof(matrix *))) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } if ((new_model->layer_input_vectors = calloc(new_model->layers, sizeof(matrix *))) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } if ((new_model->layer_output_vectors = calloc(new_model->layers, sizeof(matrix *))) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } // initialize the input and output vector arrays to NULL for (int i = 0; i < new_model->layers; i++) { new_model->layer_input_vectors[i] = NULL; new_model->layer_output_vectors[i] = NULL; } if ((new_model->output = calloc(1, sizeof(matrix))) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } // Create the connection weight matricies between the layers (except last hidden layer and outputs) for (int i = 0; i < (new_model->layers - 1); i++) { if ((new_model->layer_weights[i] = create_matrix(model->layer_weights[i]->rows, model->layer_weights[i]->columns)) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } } // Create connection weight, input and output matrix between last hidden layer and output if ((new_model->layer_weights[new_model->layers - 1] = create_matrix(model->nodes_per_layer[new_model->layers - 1], new_model->outputs)) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } // copy all the network weights for (int i = 0; i < new_model->layers; i++) { if (i == (new_model->layers - 1)) { for (int j = 0; j < new_model->nodes_per_layer[i]; j++) { for (int k = 0; k < new_model->outputs; k++) { new_model->layer_weights[i]->weight_matrix[j][k] = model->layer_weights[i]->weight_matrix[j][k]; } } } else { for (int j = 0; j < new_model->nodes_per_layer[i]; j++) { for (int k = 0; k < new_model->nodes_per_layer[i + 1]; k++) { new_model->layer_weights[i]->weight_matrix[j][k] = model->layer_weights[i]->weight_matrix[j][k]; } } } } return new_model; } int save_model(const nn_model * m, const char * file) { FILE * out; if ((out= fopen(file, "w")) == NULL) { return 1; } fprintf(out, "%lf\t", m->learning_rate); fprintf(out, "%lf\t", m->momentum); fprintf(out, "%d\t", m->layers); fprintf(out, "%d\t\n", m->outputs); for (int i = 0; i < m->layers; i++) { fprintf(out, "%d\t", m->nodes_per_layer[i]); } for (int i = 0; i < m->layers; i++) { for (int j = 0; j < m->layer_weights[i]->rows; j++) { for (int k = 0; k < m->layer_weights[i]->columns; k++) { fprintf(out, "%lf\t", m->layer_weights[i]->weight_matrix[j][k]); } fprintf(out, "\n"); } } if (fclose(out) == EOF) { return 1; } return 0; } struct nn_model * load_model(char * file) { FILE * temp; nn_model * loaded; if ((temp = fopen(file, "r")) == NULL) { fprintf(stderr, "Unable to load model from %s", file); return NULL; } if ((loaded = calloc(1, sizeof(nn_model))) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } fscanf(temp, "%lf\t", &loaded->learning_rate); fscanf(temp, "%lf\t", &loaded->momentum); fscanf(temp, "%d\t", &loaded->layers); fscanf(temp, "%d\t\n", &loaded->outputs); if ((loaded->nodes_per_layer = calloc(loaded->layers, sizeof(int))) == NULL) { fprintf(stderr, "Unable to copy model\n"); return NULL; } for (int i = 0; i < loaded->layers; i++) { fscanf(temp, "%d\t", &loaded->nodes_per_layer[i]); } if ((loaded->layer_weights = calloc(loaded->layers, sizeof(matrix *))) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } for (int i = 0; i < loaded->layers; i++) { int columns = 0; int rows = loaded->nodes_per_layer[i]; if (i != loaded->layers) { columns = loaded->nodes_per_layer[i + 1]; } else { columns = loaded->outputs; } loaded->layer_weights[i] = create_matrix(rows, columns); for (int j = 0; j < loaded->layer_weights[i]->rows; j++) { for (int k = 0; k < loaded->layer_weights[i]->columns; k++) { fscanf(temp, "%lf\t", &loaded->layer_weights[i]->weight_matrix[j][k]); } } } if ((loaded->layer_input_vectors = calloc(loaded->layers, sizeof(matrix *))) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } if ((loaded->layer_output_vectors = calloc(loaded->layers, sizeof(matrix *))) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } // initialize the input and output vector arrays to NULL for (int i = 0; i < loaded->layers; i++) { loaded->layer_input_vectors[i] = NULL; loaded->layer_output_vectors[i] = NULL; } if ((loaded->output = calloc(1, sizeof(matrix))) == NULL) { fprintf(stderr, "Not enough memory for model\n"); return NULL; } loaded->output = NULL; loaded->previous_weight_updates = NULL; return loaded; } matrix * multiply_matricies(const matrix * a, const matrix * b) { matrix * result; if (a->columns != b->rows) { fprintf(stderr, "Unable to multiply these matricies\n"); return NULL; } if ((result = create_matrix(a->rows, b->columns)) == NULL) { fprintf(stderr, "Unable to allocated memory formatrix result\n"); return NULL; } for (int i = 0; i < result->rows; i++) { for (int j = 0; j < result->columns; j++) { double value = 0.0; for (int k = 0; k < a->columns; k++) { value += a->weight_matrix[i][k] * b->weight_matrix[k][j]; } result->weight_matrix[i][j] = value; } } return result; } int add_matricies(matrix * a, const matrix * b) { if (a->rows != b->rows || a->columns != b->columns) { fprintf(stderr, "Unable to add matricies\n"); return 1; } for (int i = 0; i < a->rows; i++) { for (int j = 0; j < a->columns; j++) { a->weight_matrix[i][j] = a->weight_matrix[i][j] + b->weight_matrix[i][j]; } } return 0; } matrix * activation_function(const matrix * input_vector) { matrix * output_vector; if (input_vector->rows != 1) { fprintf(stderr, "input vectors for a layer must be nx1\n"); return NULL; } if ((output_vector = create_matrix(input_vector->rows, input_vector->columns)) == NULL) { fprintf(stderr, "Unable to run the activation function\n"); return NULL; } for (int i = 0; i < input_vector->columns; i++) { double sigmoid = 1 / (1 + exp(-1 * input_vector->weight_matrix[0][i])); output_vector->weight_matrix[0][i] = sigmoid; } return output_vector; } int classify_instance(nn_model * current, message * input, const int size) { int layers = current->layers; matrix * input_vector; matrix * output_vector; double bias = 1.0; reset_model_vectors(current); if ((input_vector = create_matrix(1, size)) == NULL) { fprintf(stderr, "Unable to classify the instance\n"); return 1; } for (int i = 0; i < input_vector->columns; i++) { input_vector->weight_matrix[0][i] = input->text_vector[i]; } current->layer_output_vectors[0] = input_vector; for (int i = 0; i < layers; i++) { // add bias input current->layer_output_vectors[i]->weight_matrix[0][current->layer_output_vectors[i]->columns - 1] = bias; current->layer_input_vectors[i] = multiply_matricies(current->layer_output_vectors[i], current->layer_weights[i]); if (current->layer_input_vectors[i] == NULL) { fprintf(stderr, "Unable to classify\n"); return 1; } if (i != (layers - 1)) { current->layer_output_vectors[i + 1] = activation_function(current->layer_input_vectors[i]); if (current->layer_output_vectors[i + 1] == NULL) { fprintf(stderr, "Unable to classify\n"); return 1; } } } output_vector = activation_function(current->layer_input_vectors[layers - 1]); current->output = output_vector; if (output_vector == NULL) { fprintf(stderr, "Unable to classify\n"); return 1; } for (int i = 0; i < PRO_CON_OUTPUT; i++) { input->prediction_probability[i] = current->output->weight_matrix[0][i]; } if (input->prediction_probability[0] > input->prediction_probability[1]) { input->prediction = CON; } else { input->prediction = PRO; } return 0; } int reset_model_vectors(nn_model * m) { free_matrix(m->output); m->output = NULL; for (int i = 0; i < m->layers; i++) { if (m->layer_input_vectors[i] != NULL) { free_matrix(m->layer_input_vectors[i]); m->layer_input_vectors[i] = NULL; } if (m->layer_output_vectors[i] != NULL) { free_matrix(m->layer_output_vectors[i]); m->layer_output_vectors[i] = NULL; } } return 0; } int backprop_update(nn_model * update, matrix * output) { matrix * output_error; matrix ** hidden_error; matrix ** weight_updates; int use_momentum = 0; int hidden_layers = update->layers - 1; if ((output_error = create_matrix(1, PRO_CON_OUTPUT)) == NULL) { fprintf(stderr, "Unable to allocate memory for error term\n"); return 1; } // allocated enough memory for an error matrix for every layer (except input layer) if ((hidden_error = calloc(hidden_layers, sizeof(matrix *))) == NULL) { fprintf(stderr, "Unable to allocate memory for hidden errors\n"); return 1; } // allocate enough memory for all the weight updates if ((weight_updates = calloc(update->layers, sizeof(matrix *))) == NULL) { fprintf(stderr, "Unable to allocate memory for error updates\n"); return 1; } for (int i = 0; i < update->layers; i++) { weight_updates[i] = create_matrix(update->layer_weights[i]->rows, update->layer_weights[i]->columns); if (weight_updates[i] == NULL) { fprintf(stderr, "Unable to store weight updates\n"); return 1; } } // Compute the error outputs for (int i = 0; i < PRO_CON_OUTPUT; i++) { output_error->weight_matrix[0][i] = (update->output->weight_matrix[0][i] * (1 - update->output->weight_matrix[0][i]) * (output->weight_matrix[0][i] - update->output->weight_matrix[0][i])); } for (int i = hidden_layers; i > 0; i--) { int forward_nodes = 0; matrix * forward_error; if (i == hidden_layers) { forward_nodes = update->outputs; forward_error = output_error; } else { forward_nodes = update->nodes_per_layer[i + 1]; forward_error = hidden_error[i]; } hidden_error[i - 1] = create_matrix(1, update->nodes_per_layer[i]); if (hidden_error[i - 1] == NULL) { fprintf(stderr, "Unable to allocate memory for hidden layer errors\n"); return 1; } for (int j = 0; j < update->nodes_per_layer[i]; j++) { double error_sum = 0.0; for (int k = 0; k < forward_nodes; k++) { error_sum += update->layer_weights[i]->weight_matrix[j][k] * forward_error->weight_matrix[0][k]; } hidden_error[i - 1]->weight_matrix[0][j] = (update->layer_output_vectors[i]->weight_matrix[0][j] * (1 - update->layer_output_vectors[i]->weight_matrix[0][j]) * error_sum); } } if (update->previous_weight_updates != NULL) { use_momentum = 1; } // Compute Weight Updates for (int i = update->layers; i > 0; i--) { matrix * forward_error; if (i == update->layers) { forward_error = output_error; } else { forward_error = hidden_error[i - 1]; } for (int j = 0; j < update->layer_weights[i - 1]->rows; j++) { for (int k = 0; k < update->layer_weights[i - 1]->columns; k++) { double momentum_term = 0.0; if (use_momentum) { momentum_term = update->momentum * update->previous_weight_updates[i - 1]->weight_matrix[j][k]; } weight_updates[i - 1]->weight_matrix[j][k] = (update->learning_rate * forward_error->weight_matrix[0][k] * update->layer_output_vectors[i - 1]->weight_matrix[0][j]) + momentum_term; update->layer_weights[i - 1]->weight_matrix[j][k] += weight_updates[i - 1]->weight_matrix[j][k]; } } } free_matrix(output_error); if (hidden_error != NULL) { for (int i = 0; i < hidden_layers; i++) { free_matrix(hidden_error[i]); } } if (update->previous_weight_updates != NULL) { for (int i = 0; i < update->layers; i++) { free_matrix(update->previous_weight_updates[i]); } } update->previous_weight_updates = weight_updates; return 0; } nn_model * train_model(nn_model * m, data * training_data, data * test_data, double error_rate, int epoch_max) { int epochs = 0; double test_error_rate = 1; double last_error_rate = 1; int epochs_error_increase = 0; nn_model * best = NULL; while (epochs < epoch_max) { matrix * expected_result = create_matrix(1, m->outputs); double correctly_classified_test = 0; double correctly_classified_train = 0; double current_error_rate = 1; double train_error_rate = 1; if (epochs_error_increase > EPOCH_MAX_ERROR_INCREASE) { break; } // Run all of the instances through the network to train for (int i = 0; i < training_data->count; i++) { classify_instance(m, training_data->instances[i], m->nodes_per_layer[0]); if (training_data->instances[i]->class == PRO) { expected_result->weight_matrix[0][0] = 0; expected_result->weight_matrix[0][1] = 1; } else { expected_result->weight_matrix[0][0] = 1; expected_result->weight_matrix[0][1] = 0; } backprop_update(m, expected_result); } free_matrix(expected_result); // Compute the training error rate for this epoch for (int i = 0; i < training_data->count; i++) { if (training_data->instances[i]->class == training_data->instances[i]->prediction) { correctly_classified_train++; } } train_error_rate = (1 - (correctly_classified_train / training_data->count)); // Classify the Test Set and compute error rate for this epoch. classify_dataset(m, test_data); for (int i = 0; i < test_data->count; i++) { if (test_data->instances[i]->class == test_data->instances[i]->prediction) { correctly_classified_test++; } } current_error_rate = (1 - (correctly_classified_test / test_data->count)); // Check to see if the error rate is a new minimum if (current_error_rate < test_error_rate) { test_error_rate = current_error_rate; fprintf(stdout, "Epoch %3d: New best test error rate found %lf\n", epochs, test_error_rate); free_model(best); best = copy_model(m); epochs_error_increase = 0; } else if (current_error_rate >= last_error_rate) { epochs_error_increase++; } else { epochs_error_increase = 0; } // Print out error rates for plotting every 10 epochs if ((epochs % 5) == 0) { fprintf(stdout, "Epoch %3d:\ttrain error:%lf\ttest error:%lf\n", epochs, train_error_rate, current_error_rate); } last_error_rate = current_error_rate; epochs++; } // Report the error rate before returning. fprintf(stdout, "Trained model to an test error rate of %lf\n", test_error_rate); free_model(m); return best; } int classify_dataset(nn_model * m, data * set) { // Use Grand Central Dispatch and Blocks to multithread this task for performance dispatch_apply(set->count, dispatch_get_global_queue(0, 0), ^ (size_t i) { nn_model * classifier = copy_model(m); classify_instance(classifier, set->instances[i], classifier->nodes_per_layer[0]); free_model(classifier); }); // int pro = 0; // int con = 0; // for (int i = 0; i < set->count; i++) { // if (set->instances[i]->prediction == PRO) { // pro++; // } else { // con++; // } // } // fprintf(stdout, "Classified %d Pros and %d Cons\n", pro, con); return 0; } void print_confusion_matrix(data * set) { matrix * confusion_matrix = create_matrix(2, 2); char * class_label[] = { "Con", "Pro" }; for (int i = 0; i < set->count; i++) { confusion_matrix->weight_matrix[set->instances[i]->class][set->instances[i]->prediction]++; } print_matrix(confusion_matrix, class_label); double accuracy = ((confusion_matrix->weight_matrix[0][0] + confusion_matrix->weight_matrix[1][1]) / set->count) * 100; fprintf(stdout, "The model corretly classified %.2lf%% of the instances\n", accuracy); free_matrix(confusion_matrix); } void print_matrix(matrix * m, char ** labels) { int labeled = 0; if (labels != NULL) { labeled = 1; } if (labeled) { for (int i = 0; i < m->rows; i++) { fprintf(stdout, "%d\t", i); } fprintf(stdout, "| <- Classified as\n"); } for (int i = 0; i < m->rows; i++) { for (int j = 0; j < m->columns; j++) { fprintf(stdout, "%.0lf\t", m->weight_matrix[i][j]); } if (labeled) { fprintf(stdout, "| %d - %s", i, labels[i]); } fprintf(stdout, "\n"); } }