initial github commit
Implementation of backprop in C using Grand Central Dispatch and Blocks
This commit is contained in:
121
process.h
Normal file
121
process.h
Normal file
@@ -0,0 +1,121 @@
|
||||
#ifndef PROCESS
|
||||
#define PROCESS
|
||||
|
||||
#define PRO 1
|
||||
#define CON 0
|
||||
#define PRO_CON_OUTPUT 2
|
||||
#define UNKNOWN 2
|
||||
#define VECTOR_SIZE 1000
|
||||
#define MAX_TERM_LENGTH 128
|
||||
|
||||
/*
|
||||
A struct that contains a text message and a PRO or CON classification
|
||||
*/
|
||||
typedef struct message {
|
||||
char * text;
|
||||
double text_vector[VECTOR_SIZE];
|
||||
int class;
|
||||
int prediction;
|
||||
double prediction_probability[PRO_CON_OUTPUT];
|
||||
} message;
|
||||
|
||||
/*
|
||||
A struct that contains all of the messages used for training for testing
|
||||
*/
|
||||
typedef struct data {
|
||||
message ** instances;
|
||||
char ** vector_terms;
|
||||
int vector_document_counts[VECTOR_SIZE];
|
||||
int count;
|
||||
} data;
|
||||
|
||||
/*
|
||||
A function that will free all the memory allocated for a data struct
|
||||
@param to_free The data struct that should be free'd
|
||||
@return 0 if it is free'd successfully
|
||||
*/
|
||||
int free_data(data * to_free);
|
||||
|
||||
/*
|
||||
A function that takes a file name return a data struct that contains the messages
|
||||
(and classifications if provided)
|
||||
@param file: The name of a file of data to be read into a data structure.
|
||||
@return A pointer to a struct containing an array of message structs and their
|
||||
classifications
|
||||
*/
|
||||
data * read_data(char * file);
|
||||
|
||||
/*
|
||||
Output data into a weka format
|
||||
@param print_data: The data struct to be printed
|
||||
@param out_file: The file where the weka arff should be written
|
||||
@return 0 if successfully output
|
||||
*/
|
||||
int weka_output(data * print_data, char * out_file);
|
||||
|
||||
/*
|
||||
Output data into a csv file with 1 instance per line
|
||||
@param print_data: The data struct to be printed
|
||||
@param out_file: The file where the weka arff should be written
|
||||
@return 0 if successfully output
|
||||
*/
|
||||
int csv_output(data * print_data, char * out_file);
|
||||
/*
|
||||
A function for escaping single quotes in a string.
|
||||
Based off generic code found http://creativeandcritical.net/str-replace-c/
|
||||
Modified to only escapse ''s
|
||||
@param str The string to escape
|
||||
@return An escaped string.
|
||||
*/
|
||||
char * escape_single_quote(const char *str);
|
||||
|
||||
/*
|
||||
A function that reads in a collection of stop words from a file with 1 word per line.
|
||||
@param filename The name of the file to be parsed
|
||||
@param word_count A pointer where the number of stop words should be stored
|
||||
@return the array of words
|
||||
*/
|
||||
char ** load_stop_words(char * filename, int * word_count);
|
||||
|
||||
/*
|
||||
A function that takes a dataset, a percentage that should be reserved for testing.
|
||||
The function requires 2 data pointers for storing the resulting train and test sets
|
||||
@param dataset The data to be split
|
||||
@param percent The percent of the data to be used for TESTING
|
||||
@param train The data that will be used for training
|
||||
@param test The data that will be used for testing
|
||||
@return 0 if the new datasets are created without issue. non zero otherwise.
|
||||
*/
|
||||
int train_test_split(const data * dataset, const int percent, data * train, data * test);
|
||||
|
||||
/*
|
||||
A function that parses through all the supplied messages in a data struct
|
||||
and determines the most relevant terms in the training data. These will be
|
||||
used in the construction of a vector representation of any specfic message
|
||||
@param dataset The collection of messages to be considered
|
||||
@param stop_words an array of stopwords to ignore
|
||||
@param stop_word_count the number of stop words.
|
||||
@return 0 if the vector representation is found and the vectors created for
|
||||
all messages
|
||||
*/
|
||||
int create_vector_represntation(data * dataset, char ** stop_words, const int stop_word_count, const int size);
|
||||
|
||||
/*
|
||||
A simple function that compares word counts based on an array of words
|
||||
and a second array of the counts.
|
||||
@param a word 1
|
||||
@param b word 2
|
||||
@return -1, 0, 1 if a is less than, equal to, or greater than b
|
||||
*/
|
||||
int compare_strings(const void * a, const void * b);
|
||||
|
||||
/*
|
||||
A function that parses through all the supplied messages in a data struct
|
||||
and determines the vector representation based on the supplied vector terms
|
||||
@param dataset The collection of messages to be considered
|
||||
@param vector_terms The array of terms that make up the vector
|
||||
@return 0 if the vectors created for all messages
|
||||
*/
|
||||
int vector_representation(data * dataset, char ** vector_terms, int * vector_document_counts, const int size);
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user