-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathneural_network.c
137 lines (110 loc) · 3.97 KB
/
neural_network.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "include/mnist_file.h"
#include "include/neural_network.h"
// Convert a pixel value from 0-255 to one from 0 to 1
#define PIXEL_SCALE(x) (((float) (x)) / 255.0f)
// Returns a random value between 0 and 1
#define RAND_FLOAT() (((float) rand()) / ((float) RAND_MAX))
/**
* Initialise the weights and bias vectors with values between 0 and 1
*/
void neural_network_random_weights(neural_network_t * network)
{
int i, j;
for (i = 0; i < MNIST_LABELS; i++) {
network->b[i] = RAND_FLOAT();
for (j = 0; j < MNIST_IMAGE_SIZE; j++) {
network->W[i][j] = RAND_FLOAT();
}
}
}
/**
* Calculate the softmax vector from the activations. This uses a more
* numerically stable algorithm that normalises the activations to prevent
* large exponents.
*/
void neural_network_softmax(float * activations, int length)
{
int i;
float sum, max;
for (i = 1, max = activations[0]; i < length; i++) {
if (activations[i] > max) {
max = activations[i];
}
}
for (i = 0, sum = 0; i < length; i++) {
activations[i] = exp(activations[i] - max);
sum += activations[i];
}
for (i = 0; i < length; i++) {
activations[i] /= sum;
}
}
/**
* Use the weights and bias vector to forward propogate through the neural
* network and calculate the activations.
*/
void neural_network_hypothesis(mnist_image_t * image, neural_network_t * network, float activations[MNIST_LABELS])
{
int i, j;
for (i = 0; i < MNIST_LABELS; i++) {
activations[i] = network->b[i];
for (j = 0; j < MNIST_IMAGE_SIZE; j++) {
activations[i] += network->W[i][j] * PIXEL_SCALE(image->pixels[j]);
}
}
neural_network_softmax(activations, MNIST_LABELS);
}
/**
* Update the gradients for this step of gradient descent using the gradient
* contributions from a single training example (image).
*
* This function returns the loss ontribution from this training example.
*/
float neural_network_gradient_update(mnist_image_t * image, neural_network_t * network, neural_network_gradient_t * gradient, uint8_t label)
{
float activations[MNIST_LABELS];
float b_grad, W_grad;
int i, j;
// First forward propagate through the network to calculate activations
neural_network_hypothesis(image, network, activations);
for (i = 0; i < MNIST_LABELS; i++) {
// This is the gradient for a softmax bias input
b_grad = (i == label) ? activations[i] - 1 : activations[i];
for (j = 0; j < MNIST_IMAGE_SIZE; j++) {
// The gradient for the neuron weight is the bias multiplied by the input weight
W_grad = b_grad * PIXEL_SCALE(image->pixels[j]);
// Update the weight gradient
gradient->W_grad[i][j] += W_grad;
}
// Update the bias gradient
gradient->b_grad[i] += b_grad;
}
// Cross entropy loss
return 0.0f - log(activations[label]);
}
/**
* Run one step of gradient descent and update the neural network.
*/
float neural_network_training_step(mnist_dataset_t * dataset, neural_network_t * network, float learning_rate)
{
neural_network_gradient_t gradient;
float total_loss;
int i, j;
// Zero initialise gradient for weights and bias vector
memset(&gradient, 0, sizeof(neural_network_gradient_t));
// Calculate the gradient and the loss by looping through the training set
for (i = 0, total_loss = 0; i < dataset->size; i++) {
total_loss += neural_network_gradient_update(&dataset->images[i], network, &gradient, dataset->labels[i]);
}
// Apply gradient descent to the network
for (i = 0; i < MNIST_LABELS; i++) {
network->b[i] -= learning_rate * gradient.b_grad[i] / ((float) dataset->size);
for (j = 0; j < MNIST_IMAGE_SIZE; j++) {
network->W[i][j] -= learning_rate * gradient.W_grad[i][j] / ((float) dataset->size);
}
}
return total_loss;
}