-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathNeuralNet2.m
711 lines (640 loc) · 30.7 KB
/
NeuralNet2.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
classdef NeuralNet2 < handle
% NeuralNet2 Neural Network implementation for the NeuralNetPlayground tool
% This class implements the training (forward and backpropagation) and
% predictions using Artificial Neural Networks (ANN). The primary purpose
% is to assist in the construction of the NeuralNetPlayground framework
% as well as providing a framework for training neural networks that
% uses core MATLAB functionality only (i.e. no toolbox dependencies).
%
% This class is initialized by specifying the total number of input
% layer neurons, hidden layer neurons for each hidden layer desired
% and the total number of output layer neurons as a single vector.
% With specified training examples and expected outputs, the neural
% network weights are learned with Stochastic Gradient Descent.
%
% For regression, the number of output neurons is usually 1. For
% binary class classification, the number of output neurons is usually 1
% and you perform the appropriate thresholding to decide which class
% an input belongs to. For multi-class classification, the number of
% output neurons is usually the total number of classes in your data.
%
% To do non-linear regression, you usually use a non-linear hidden
% activation function (sigmoid, tanh) with a linear output hidden
% activation function. However, if you want to linear regression it may
% be more stable to specify no hidden layers and just have input
% and output layers only. For classification, you can specify all layers
% to have the same activation function. You also have the option of
% specifying no hidden layers and having a single input and output layer
% which will reduce to logistic regression.
%
% The learned weights can be used to predict new examples given the
% learned weights. The loss function used in this implementation is
% the sum of squared differences or Euclidean loss function.
%
% NeuralNet2 Properties:
% LearningRate - The learning rate for Stochastic Gradient Descent
% Must be strictly positive (i.e. > 0).
% Default value is 0.03.
%
% ActivationFunction - The activation function to be applied to each
% neuron in the hidden and output layer.
% The ones you can choose from are:
% 'linear': Linear
% 'relu': Rectified Linear Unit (i.e. ramp)
% 'tanh': Hyperbolic Tangent
% 'sigmoid': Sigmoidal
% Default is 'tanh'.
%
% OutputActivationFunction - The activation function to be applied
% specifically to the output layer. This will
% change the output layer activation function
% but leave the hidden layer one intact.
% The ones you can choose are the same as
% seen in the ActivationFunction property.
% To set this to use the same activation function
% as the hidden layer, set this to empty ([]).
% Default is [].
%
% RegularizationType - Apply regularization to the training process
% if desired.
% The ones you can choose from are:
% 'L1': L1 regularization
% 'L2': L2 regularization
% 'none': No regularization
% Default is 'none'
% Note: The method used for L1 regularization
% comes from:
%
% Tsuroka, Y., Tsujii, J., Ananiadou, S.,
% Stochastic Gradient Descent Training for
% L1-regularized Log-linear Models with Cumulative
% Penalty
% http://aclweb.org/anthology/P/P09/P09-1054.pdf
%
% RegularizationRate - The rate of regularization to apply (if desired)
% Must be non-negative (i.e. >= 0).
% Default is 0.
%
% BatchSize - Number of training examples selected per iteration
% Choosing 1 example would implement true Stochastic
% Gradient Descent while choosing the total number
% of examples would implement Batch Gradient Descent.
% Choosing any value in between implements mini-batch
% Stochastic Gradient Descent.
% Must be strictly positive (i.e. > 0) and integer.
% Default is 10. If the batch size is larger than
% the total number of examples, the batch size
% reverts to using the total number of examples.
%
% ==========================
%
% Example Use - Binary Classification
% -----------
% X = [0 0; 0 1; 1 0; 1 1]; % Define XOR data
% Y = [-1; 1; 1; -1];
% net = NeuralNet2([2 4 1]); % Create Neural Network object
% % Two input layer neurons, one hidden
% % layer with four neurons and one output
% % layer neuron
% N = 5000; % Do 5000 iterations of Stochastic Gradient Descent
%
% % Customize Neural Network engine
% net.LearningRate = 0.1; % Learning rate is set to 0.1
% net.RegularizationType = 'L2'; % Regularization is L2
% net.RegularizationRate = 0.001; % Regularization rate is 0.001
%
% perf = net.train(X, Y, N); % Train the Neural Network
% Yraw = net.sim(X); % Use trained object on original examples
% Ypred = ones(size(Yraw)); % Perform classification with thresholding
% Ypred(Yraw < 0) = -1;
% plot(1:N, perf); % Plot cost function per iteration
%
% % Display results
% disp('Training Examples and expected labels'); display(X); display(Y);
% disp('Predicted outputs'); display(Ypred);
% disp('Classification accuracy: ')
% disp(100 * sum(Y == Ypred) / size(X, 1));
%
%
% ==========================
%
% Example Use - Multiclass Classification
% -----------
% load fisheriris; % Load the Fisher iris dataset
% numFeatures = size(meas, 2); % Total number of features - should be 4
% [~, ~, IDs] = unique(species); % Convert character labels to unique IDs
% numClasses = max(IDs); % Get total number of possible classes
% M = size(meas, 1); % Number of examples
% Y = full(sparse(1 : M, IDs.', 1, M, numClasses)); % Create an output
% % matrix where each row is an
% % example and each column denotes
% % the class the example belongs to.
% % That class gets assigned 1 while
% % the others get 0. This should be
% % -1 if you are using tanh.
% net = NeuralNet2([numFeatures 4 numClasses]); % Create Neural Network object
% % Four input layer neurons, one hidden
% % layer with four neurons and three output layer
% % neuron
% N = 5000; % Do 5000 iterations of Stochastic Gradient Descent
% % Customize Neural Network engine
% net.LearningRate = 0.1; % Learning rate is set to 0.1
% net.RegularizationType = 'L2'; % Regularization is L2
% net.RegularizationRate = 0.001; % Regularization rate is 0.001
% net.ActivationFunction = 'sigmoid'; % sigmoid hidden activation function
% perf = net.train(meas, Y, N); % Train the Neural Network
% Yraw = net.sim(meas); % Use trained object on original examples
% [~, Ypred] = max(Yraw, [], 2); % Determine which class has the largest
% % response per example
% plot(1:N, perf); % Plot cost function per iteration
% % Display results
% disp('Training Examples and expected labels'); display(X); display(Y);
% disp('Predicted outputs'); display(Ypred);
% disp('Classification accuracy: ');
% disp(100 * sum(IDs == Ypred) / M);
%
%
% ==========================
%
% Example Use - Regression
% -----------
% [x, y] = meshgrid(-2:0.5:2); % Define some sample two feature values
% z = x .* exp(-y); % Output is the function: x*(e^y)
% X = [x(:) y(:)]; % Unroll the features so they are each in a single column
% Y = z(:); % Unroll output of function into single column
% net = NeuralNet2([2 10 1]); % Create Neural Network object
% % Two input layer neurons, one hidden
% % layer with 10 neurons and one output layer
% % neuron
% N = 10000; % Do 10000 iterations of Stochastic Gradient Descent
% % Customize Neural Network engine
% net.LearningRate = 0.1; % Learning rate is set to 0.1
% net.RegularizationType = 'L2'; % Regularization is L2
% net.RegularizationRate = 0.001; % Regularization rate is 0.001
% net.ActivationFunction = 'Tanh'; % tanh hidden activation function
% net.OutputActivationFunction = 'linear'; % linear output activation function
% perf = net.train(X, Y, N); % Train the Neural Network
% Ypred = net.sim(X); % Use trained object on original examples
% plot(1:N, perf); % Plot cost function per iteration
%
% % Display results
% disp('Absolute difference between predicted and true values');
% disp(abs(Y - Ypred));
%
% ==========================
%
% See also NEURALNETAPP
%
% StackOverflowMATLABchat - http://chat.stackoverflow.com/rooms/81987/matlab-and-octave
% Authors: Raymond Phan - http://stackoverflow.com/users/3250829/rayryeng
% Amro - http://stackoverflow.com/users/97160/amro
properties (Access = public)
LearningRate % The learning rate (positive number)
ActivationFunction % The desired activation function (string)
OutputActivationFunction % The desired output activation function (string)
RegularizationType % The type of regularization (string)
RegularizationRate % The regularization rate (non-negative number)
BatchSize % The size of the batch per iteration (positive integer number)
end
properties (Access = private)
inputSize % Single value denoting how many neurons are in the input layer
hiddenSizes % Vector denoting how many neurons per hidden layer
outputSize % Single value denoting how many neurons are in the output layer
weights % The weights of the neural network per layer
end
methods
function this = NeuralNet2(layerSizes)
% NeuralNet2 Create a Neural Network Instance
% The constructor takes in a vector of layer sizes where the
% first element denotes how many neurons are in the input layer,
% the next N elements denote how many neurons are in each desired
% hidden layer and the last element denotes how many neurons are
% in the output layer. Take note that the amount of neurons
% per layer that you specify does not include the bias units.
% These will be included when training the network. Therefore,
% the expected size of the vector is N + 2 where N is the total
% number of hidden layers for the neural network. The exception
% to this rule is when you specify a vector of two elements. This
% is interpreted as having an input layer, an output and no
% hidden layers. This situation is when you would like to perform
% simple linear or logistic regression.
%
% The following example creates a neural network with 1 input
% neuron (plus a bias) in the input layer, 2 hidden neurons
% (plus a bias) in the first hidden layer, 3 hidden neurons
% (plus a bias) in the second hidden layer and 1 output neuron
% in the output layer
%
% layerSizes = [1 2 3 1];
% net = NeuralNet2(layerSizes);
% default params
this.LearningRate = 0.03;
this.ActivationFunction = 'Tanh';
this.OutputActivationFunction = [];
this.RegularizationType = 'None';
this.RegularizationRate = 0;
this.BatchSize = 10;
assert(numel(layerSizes) >= 2, 'Total number of layers should be at least 2');
% network structure (fully-connected feed-forward)
% Obtain input layer neuron size
this.inputSize = layerSizes(1);
% Obtain the hidden layer neuron sizes
if numel(layerSizes ~= 2)
this.hiddenSizes = layerSizes(2:end-1);
else
this.hiddenSizes = 0;
end
% Obtain the output layer neuron size
this.outputSize = layerSizes(end);
% Initialize matrices relating between the ith layer
% and (i+1)th layer
this.weights = cell(1, numel(layerSizes) - 1);
for i = 1 : numel(layerSizes) - 1
this.weights{i} = zeros(layerSizes(i) + 1, layerSizes(i + 1));
end
% Initialize weights
init(this);
end
% ???
function configure(this, X, Y)
% check correct sizes
[xrows,xcols] = size(X);
[yrows,ycols] = size(Y);
assert(xrows == yrows);
assert(xcols == this.inputSize);
assert(ycols == this.outputSize);
% min/max of inputs/outputs
inMin = min(X);
inMax = max(X);
outMin = min(Y);
outMax = max(Y);
end
function init(this)
% init Initialize the Neural Network Weights
% This method initializes the neural network weights
% for connections using the initialization suggested by
% Kaiming He:
%
% He, K., Zhang, X., Ren, S., & Sun, J. (2015).
% Delving deep into rectifiers: Surpassing human-level performance
% on ImageNet classification. arXiv preprint arXiv:1502.01852.
% https://arxiv.org/abs/1502.01852
%
% Take note that this method is run when you create an instance
% of the object. You would call init if you want to reinitialize
% the neural network and start from the beginning.
%
% Uses:
% net = NeuralNet2([1 2 1]);
% % Other code...
% % ...
% % (Re-)initialize weights
% net.init();
for ii = 1 : numel(this.weights)
num = numel(this.weights{ii});
% Kaiming He et al. initialization strategy
this.weights{ii}(:) = 2.0 * randn(num, 1) / ...
sqrt(size(this.weights{ii}, 1));
end
end
function perf = train(this, X, Y, numIter)
% train Perform neural network training with Stochastic Gradient Descent (SGD)
% This method performs training on the neural network structure
% that was specified when creating an instance of the class.
% Using training example features and their expected outcomes,
% trained network weights are created to facilitate future
% predictions.
%
% Inputs:
% X - Training example features as a 2D matrix of size M x N
% M is the total number of examples and N are the
% total number of features. M is expected to be the
% same size as the number of input layer neurons.
%
% Y - Training example expected outputs as a 2D matrix of size
% M x P where M is the total number of examples and P is
% the total number of output neurons in the output layer.
%
% numIter - Number of iterations Stochastic Gradient Descent
% should take while training. This is an optional
% parameter and the number of iterations defaults to
% 1 if omitted.
%
% Outputs:
% perf - An array of size numIter x 1 which denotes
% the cost between the predicted outputs and
% expected outputs at each iteration of learning
% the weights
%
% Uses:
% net = NeuralNet2([1 2 1]); % Create NN object
% % Create example data here stored in X and Y
% %...
% %...
% perf = net.train(X, Y); % Perform 1 iteration
% perf2 = net.train(X, Y, 500); % Perform 500 iterations
%
% See also NEURALNET2, SIM
% If the number of iterations is not specified, assume 1
if nargin < 4, numIter = 1; end
% Ensure correct sizes
assert(size(X, 1) == size(Y, 1), ['Total number of examples ' ...
'the inputs and outputs should match'])
% Ensure regularization rate and batch size is proper
assert(this.BatchSize >= 1, 'Batch size should be 1 or more');
assert(this.RegularizationRate >= 0, ['Regularization rate ' ...
'should be 0 or larger']);
% Check if we have specified the right regularization type
regType = this.RegularizationType;
assert(any(strcmpi(regType, {'l1', 'l2', 'none'})), ...
['Ensure that you choose one of ''l1'', ''l2'' or '...
'''none'' for the regularization type']);
% Ensure number of iterations is strictly positive
assert(numIter >= 1, 'Number of iterations should be positive');
% Initialize cost function array
perf = zeros(1, numIter);
% Total number of examples
N = size(X, 1);
% Total number of applicable layers
L = numel(this.weights);
% Get batch size
% Remove decimal places in case of improper input
B = floor(this.BatchSize);
% Safely catch if batch size is larger than total number
% of examples
if B > N
B = N;
end
% Cell array to store input and outputs of each neuron
sNeuron = cell(1, L);
% First cell array is for the initial
xNeuron = cell(1, L + 1);
% Cell array for storing the sensitivities
delta = cell(1, L);
% For L1 regularization
if strcmpi(regType, 'l1')
% This represents the total L1 penalty that each
% weight could have received up to current point
uk = 0;
% Total penalty for each weight that was received up to
% current point
qk = cell(1, L);
for ii = 1 : L
qk{ii} = zeros(size(this.weights{ii}));
end
end
% Get activation function for the hidden layer
fcn = getActivationFunction(this.ActivationFunction);
% Get derivative of activation function for the hidden layer
dfcn = getDerivativeActivationFunction(this.ActivationFunction);
% Do the same for the output layer
if isempty(this.OutputActivationFunction)
fcnOut = fcn;
dfcnOut = dfcn;
else
fcnOut = getActivationFunction(this.OutputActivationFunction);
dfcnOut = getDerivativeActivationFunction(this.OutputActivationFunction);
end
% For each iteration...
for ii = 1 : numIter
% If the batch size is equal to the total number of examples
% don't bother with random selection as this will be a full
% batch gradient descent
if N == B
ind = 1 : N;
else
% Randomly select examples corresponding to the batch size
% if the batch size is not equal to the number of examples
ind = randperm(N);
ind = ind(1 : B);
end
% Select out the training example features and expected outputs
IN = X(ind, :);
OUT = Y(ind, :);
% Initialize input layer
xNeuron{1} = [IN ones(B, 1)];
%%% Perform forward propagation
% Make sure you save the inputs and outputs into each neuron
% at the hidden and output layers
for jj = 1 : L
% Compute inputs into next layer
sNeuron{jj} = xNeuron{jj} * this.weights{jj};
% Compute outputs of this layer
if jj == L
xNeuron{jj + 1} = fcnOut(sNeuron{jj});
else
xNeuron{jj + 1} = [fcn(sNeuron{jj}) ones(B, 1)];
end
end
%%% Perform backpropagation
% Compute sensitivities for output layer
delta{end} = (xNeuron{end} - OUT) .* dfcnOut(sNeuron{end});
% Compute the sensitivities for the rest of the layers
for jj = L - 1 : -1 : 1
delta{jj} = dfcn(sNeuron{jj}) .* ...
(delta{jj + 1}*(this.weights{jj + 1}(1 : end - 1, :)).');
end
%%% Compute weight updates
alpha = this.LearningRate;
lambda = this.RegularizationRate;
for jj = 1 : L
% Obtain the outputs and sensitivities for each
% affected layer
XX = xNeuron{jj};
D = delta{jj};
% Calculate batch weight update
weight_update = (1 / B) * (XX.') * D;
% Apply L2 regularization if required
if strcmpi(regType, 'l2')
weight_update(1 : end-1, :) = weight_update(1 : end - 1, :) + ...
(lambda/B)*this.weights{jj}(1 : end - 1, :);
end
% Compute the final update
this.weights{jj} = this.weights{jj} - alpha * weight_update;
end
% Apply L1 regularization if required
if strcmpi(regType, 'l1')
% Step #1 - Accumulate total L1 penalty that each
% weight could have received up to this point
uk = uk + (alpha * lambda / B);
% Step #2
% Using the updated weights, now apply the penalties
for jj = 1 : L
% 2a - Save previous weights and penalties
% Make sure to remove bias terms
z = this.weights{jj}(1 : end - 1,:);
q = qk{jj}(1 : end - 1,:);
% 2b - Using the previous weights, find the weights
% that are positive and negative
w = z;
indwp = w > 0;
indwn = w < 0;
% 2c - Perform the update on each condition
% individually
w(indwp) = max(0, w(indwp) - (uk + q(indwp)));
w(indwn) = min(0, w(indwn) + (uk - q(indwn)));
% 2d - Update the actual penalties
qk{jj}(1:end-1,:) = q + (w - z);
% Don't forget to update the actual weights!
this.weights{jj}(1 : end - 1, :) = w;
end
end
% Compute cost at this iteration
perf(ii) = (0.5 / B)*sum(sum((xNeuron{end} - OUT).^2));
% Add in regularization if necessary
if strcmpi(regType, 'l1')
for jj = 1 : L
perf(ii) = perf(ii) + ...
(lambda / B) * sum(sum(abs(this.weights{jj}(1 : end - 1, :))));
end
elseif strcmpi(regType, 'l2')
for jj = 1 : L
perf(ii) = perf(ii) + ...
(0.5 * lambda / B) * sum(sum((this.weights{jj}(1 : end - 1, :)).^2));
end
end
end
end
function [OUT, OUTS] = sim(this, X)
% sim Perform Neural Network Predictions
% This method performs forward propagation using the
% learned weights after training. Forward propagation
% uses the learned weights to propogate information
% throughout the neural network and the predicted outcome
% is seen at the output layer.
%
% Inputs:
% X - Training examples to predict their outcomes
% This is a M x N matrix where M is the total number of
% examples and N is the total number of features.
% N must equal to the total number of neurons in the input
% layer
%
% Outputs:
% OUT - The predicted outputs using the training examples in X.
% This is a M x P matrix where M is the total number of
% training examples and P is the total number of output
% neurons
%
% OUTS - This is a 1D cell array of size 1 x NN where NN
% is the total number of layers in the neural network
% including the input and output layers. Therefore, if
% K is equal to the total number of hidden layers,
% NN = K+2. This cell array contains the outputs of
% each example per layer. Specifically, each element
% OUTS{ii} would be a M x Q matrix where Q would be the
% total number of neurons in layer ii without the bias
% unit. ii=1 is the input layer and ii=NN is the output
% layer. Remember if you specify a network with no hidden
% layers, this would mean that K=0 and so NN = 2.
% OUTS{ii} would contain all of the outputs for each
% example in layer ii.
%
% Uses:
% net = NeuralNet2([1 2 1]); % Create NN object
% % Create your training data and train your neural network
% % here...
% % Also create test data here stored in XX...
% % ...
% OUT = net.sim(XX); % Find predictions
% [OUT,OUTS] = net.sim(XX); % Find predictions and outputs
% % per layer
%
% See also NEURALNET2, TRAIN
% Check if the total number of features matches the
% total number of input neurons
assert(size(X, 2) == this.inputSize, ['Number of features '...
'should match the number of input neurons']);
% Get total number of examples
N = size(X, 1);
%%% Begin algorithm
% Start with input layer
OUT = X;
% Also initialize cell array with input layer's contents
OUTS = cell(1, numel(this.weights) + 1);
OUTS{1} = OUT;
% Get activation function
fcn = getActivationFunction(this.ActivationFunction);
if isempty(this.OutputActivationFunction)
fcnOut = fcn;
else
fcnOut = getActivationFunction(this.OutputActivationFunction);
end
% For each layer...
for ii = 1 : numel(this.weights)
% Compute inputs into each neuron and corresponding
% outputs
if ii == numel(this.weights)
OUT = fcnOut([OUT ones(N, 1)] * this.weights{ii});
else
OUT = fcn([OUT ones(N, 1)] * this.weights{ii});
end
OUTS{ii + 1} = OUT;
end
end
end
end
function fcn = getActivationFunction(activation)
switch lower(activation)
case 'linear'
fcn = @f_linear;
case 'relu'
fcn = @f_relu;
case 'tanh'
fcn = @f_tanh;
case 'sigmoid'
fcn = @f_sigmoid;
otherwise
error('Unknown activation function');
end
end
function fcn = getDerivativeActivationFunction(activation)
switch lower(activation)
case 'linear'
fcn = @fd_linear;
case 'relu'
fcn = @fd_relu;
case 'tanh'
fcn = @fd_tanh;
case 'sigmoid'
fcn = @fd_sigmoid;
otherwise
error('Unknown activation function');
end
end
% activation funtions and their derivatives
function y = f_linear(x)
% See also: purelin
y = x;
end
function y = fd_linear(x)
% See also: dpurelin
y = ones(size(x));
end
function y = f_relu(x)
% See also: poslin
y = max(x, 0);
end
function y = fd_relu(x)
% See also: dposlin
y = double(x >= 0);
end
function y = f_tanh(x)
% See also: tansig
%y = 2 ./ (1 + exp(-2*x)) - 1;
y = tanh(x);
end
function y = fd_tanh(x)
% See also: dtansig
y = f_tanh(x);
y = 1 - y.^2;
end
function y = f_sigmoid(x)
% See also: logsig
y = 1 ./ (1 + exp(-x));
end
function y = fd_sigmoid(x)
% See also: dlogsig
y = f_sigmoid(x);
y = y .* (1 - y);
end