From 7247d39bbcc233e0a2f00cc9c717b70da916a64d Mon Sep 17 00:00:00 2001 From: t-sagoy Date: Thu, 30 Apr 2020 12:53:28 +0000 Subject: [PATCH 1/6] FastCell Example Fixes --- .../pytorch/FastCells/fastcell_example.py | 10 ++++-- pytorch/edgeml_pytorch/trainer/fastTrainer.py | 32 ++++++++++++------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/examples/pytorch/FastCells/fastcell_example.py b/examples/pytorch/FastCells/fastcell_example.py index 9d55dd9d7..69a759c7e 100644 --- a/examples/pytorch/FastCells/fastcell_example.py +++ b/examples/pytorch/FastCells/fastcell_example.py @@ -11,7 +11,7 @@ def main(): # change cuda:0 to cuda:gpuid for specific allocation - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu") # Fixing seeds for reproducibility torch.manual_seed(42) np.random.seed(42) @@ -43,10 +43,16 @@ def main(): (dataDimension, numClasses, Xtrain, Ytrain, Xtest, Ytest, mean, std) = helpermethods.preProcessData(dataDir) - assert dataDimension % inputDims == 0, "Infeasible per step input, " + \ "Timesteps have to be integer" + timeSteps = int(Xtest.shape[1] / inputDims) + Xtest = np.reshape(Xtest, (-1, timeSteps, inputDims)) + Xtrain = Xtrain.reshape((-1, timeSteps, inputDims)) + if not args.batch_first: + Xtest = np.swapaxes(Xtest, 0, 1) + Xtrain = np.swapaxes(Xtrain, 0, 1) + currDir = helpermethods.createTimeStampDir(dataDir, cell) helpermethods.dumpCommand(sys.argv, currDir) diff --git a/pytorch/edgeml_pytorch/trainer/fastTrainer.py b/pytorch/edgeml_pytorch/trainer/fastTrainer.py index 3f0ebd338..e1aeb1547 100644 --- a/pytorch/edgeml_pytorch/trainer/fastTrainer.py +++ b/pytorch/edgeml_pytorch/trainer/fastTrainer.py @@ -77,9 +77,13 @@ def computeLogits(self, input): logits = self.classifier(feats[-1, :]) else: feats = self.RNN(input) - logits = self.classifier(feats[-1, :]) - - return logits, feats[:, -1] + if self.batch_first: + logits = self.classifier(feats[:, -1]) + feats_n = feats[:,-1] + else: + logits = self.classifier(feats[-1,:]) + feats_n = feats[-1,:] + return logits, feats_n def optimizer(self): ''' @@ -351,7 +355,13 @@ def train(self, batchSize, totalEpochs, Xtrain, Xtest, Ytrain, Ytest, ''' fileName = str(self.FastObj.cellType) + 'Results_pytorch.txt' resultFile = open(os.path.join(dataDir, fileName), 'a+') - numIters = int(np.ceil(float(Xtrain.shape[0]) / float(batchSize))) + if self.batch_first: + self.timeSteps = Xtrain.shape[1] + self.numPoints = Xtrain.shape[0] + else: + self.timeSteps = Xtrain.shape[0] + self.numPoints = Xtrain.shape[1] + numIters = int(np.ceil(float(self.numPoints) / float(batchSize))) totalBatches = numIters * totalEpochs counter = 0 @@ -362,11 +372,6 @@ def train(self, batchSize, totalEpochs, Xtrain, Xtest, Ytrain, Ytest, ihtDone = 1 maxTestAcc = -10000 header = '*' * 20 - self.timeSteps = int(Xtest.shape[1] / self.inputDims) - Xtest = Xtest.reshape((-1, self.timeSteps, self.inputDims)) - Xtest = np.swapaxes(Xtest, 0, 1) - Xtrain = Xtrain.reshape((-1, self.timeSteps, self.inputDims)) - Xtrain = np.swapaxes(Xtrain, 0, 1) for i in range(0, totalEpochs): print("\nEpoch Number: " + str(i), file=self.outFile) @@ -376,7 +381,7 @@ def train(self, batchSize, totalEpochs, Xtrain, Xtest, Ytrain, Ytest, for param_group in self.optimizer.param_groups: param_group['lr'] = self.learningRate - shuffled = list(range(Xtrain.shape[1])) + shuffled = list(range(self.numPoints)) np.random.shuffle(shuffled) trainAcc = 0.0 trainLoss = 0.0 @@ -389,9 +394,12 @@ def train(self, batchSize, totalEpochs, Xtrain, Xtest, Ytrain, Ytest, (header, msg, header), file=self.outFile) k = shuffled[j * batchSize:(j + 1) * batchSize] - batchX = Xtrain[:, k, :] + if self.batch_first: + batchX = Xtrain[k, :, :] + else: + batchX = Xtrain[:, k, :] + batchY = Ytrain[k] - self.optimizer.zero_grad() logits, _ = self.computeLogits(batchX.to(self.device)) batchLoss = self.loss(logits, batchY.to(self.device)) From e1958059b2e7de9b3a0489c2123f0ba593447d4e Mon Sep 17 00:00:00 2001 From: SachinG007 Date: Sat, 2 May 2020 23:44:36 +0000 Subject: [PATCH 2/6] LSTM logit fix for batch first --- pytorch/edgeml_pytorch/trainer/fastTrainer.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pytorch/edgeml_pytorch/trainer/fastTrainer.py b/pytorch/edgeml_pytorch/trainer/fastTrainer.py index e1aeb1547..f5d7ad307 100644 --- a/pytorch/edgeml_pytorch/trainer/fastTrainer.py +++ b/pytorch/edgeml_pytorch/trainer/fastTrainer.py @@ -74,16 +74,15 @@ def computeLogits(self, input): ''' if self.FastObj.cellType == "LSTMLR": feats, _ = self.RNN(input) - logits = self.classifier(feats[-1, :]) else: feats = self.RNN(input) - if self.batch_first: + + if self.batch_first: logits = self.classifier(feats[:, -1]) - feats_n = feats[:,-1] - else: - logits = self.classifier(feats[-1,:]) - feats_n = feats[-1,:] - return logits, feats_n + return logits, feats[:, -1] + else: + logits = self.classifier(feats[-1, :]) + return logits, feats[-1, :] def optimizer(self): ''' From 9427308ae2283bb8030d6da2c427cc65a4696d7c Mon Sep 17 00:00:00 2001 From: SachinG007 Date: Sun, 3 May 2020 00:18:55 +0000 Subject: [PATCH 3/6] rnnpool merge --- pytorch/edgeml_pytorch/graph/rnn.py | 201 ++++++++++++++++++++++------ 1 file changed, 157 insertions(+), 44 deletions(-) diff --git a/pytorch/edgeml_pytorch/graph/rnn.py b/pytorch/edgeml_pytorch/graph/rnn.py index 5a292ee00..988f7e495 100644 --- a/pytorch/edgeml_pytorch/graph/rnn.py +++ b/pytorch/edgeml_pytorch/graph/rnn.py @@ -144,8 +144,8 @@ def getVars(self): def get_model_size(self): ''' - Function to get aimed model size - ''' + Function to get aimed model size + ''' mats = self.getVars() endW = self._num_W_matrices endU = endW + self._num_U_matrices @@ -261,7 +261,7 @@ def __init__(self, input_size, hidden_size, gate_nonlinearity="sigmoid", self.zeta = nn.Parameter(self._zetaInit * torch.ones([1, 1])) self.nu = nn.Parameter(self._nuInit * torch.ones([1, 1])) - self.copy_previous_UW() + # self.copy_previous_UW() @property def name(self): @@ -330,7 +330,7 @@ class FastGRNNCUDACell(RNNCell): ''' def __init__(self, input_size, hidden_size, gate_nonlinearity="sigmoid", update_nonlinearity="tanh", wRank=None, uRank=None, zetaInit=1.0, nuInit=-4.0, wSparsity=1.0, uSparsity=1.0, name="FastGRNNCUDACell"): - super(FastGRNNCUDACell, self).__init__(input_size, hidden_size, gate_non_linearity, update_nonlinearity, + super(FastGRNNCUDACell, self).__init__(input_size, hidden_size, gate_nonlinearity, update_nonlinearity, 1, 1, 2, wRank, uRank, wSparsity, uSparsity) if utils.findCUDA() is None: raise Exception('FastGRNNCUDA is supported only on GPU devices.') @@ -967,63 +967,115 @@ class BaseRNN(nn.Module): [batchSize, timeSteps, inputDims] ''' - def __init__(self, cell: RNNCell, batch_first=False): + def __init__(self, cell: RNNCell, batch_first=False, cell_reverse: RNNCell=None, bidirectional=False): super(BaseRNN, self).__init__() - self._RNNCell = cell + self.RNNCell = cell self._batch_first = batch_first + self._bidirectional = bidirectional + if cell_reverse is not None: + self.RNNCell_reverse = cell_reverse + elif self._bidirectional: + self.RNNCell_reverse = cell def getVars(self): - return self._RNNCell.getVars() + return self.RNNCell.getVars() def forward(self, input, hiddenState=None, cellState=None): self.device = input.device + self.num_directions = 2 if self._bidirectional else 1 + # hidden + # for i in range(num_directions): hiddenStates = torch.zeros( [input.shape[0], input.shape[1], - self._RNNCell.output_size]).to(self.device) + self.RNNCell.output_size]).to(self.device) + + if self._bidirectional: + hiddenStates_reverse = torch.zeros( + [input.shape[0], input.shape[1], + self.RNNCell_reverse.output_size]).to(self.device) + if hiddenState is None: hiddenState = torch.zeros( - [input.shape[0] if self._batch_first else input.shape[1], - self._RNNCell.output_size]).to(self.device) + [self.num_directions, input.shape[0] if self._batch_first else input.shape[1], + self.RNNCell.output_size]).to(self.device) if self._batch_first is True: - if self._RNNCell.cellType == "LSTMLR": + if self.RNNCell.cellType == "LSTMLR": cellStates = torch.zeros( [input.shape[0], input.shape[1], - self._RNNCell.output_size]).to(self.device) + self.RNNCell.output_size]).to(self.device) + if self._bidirectional: + cellStates_reverse = torch.zeros( + [input.shape[0], input.shape[1], + self.RNNCell_reverse.output_size]).to(self.device) if cellState is None: cellState = torch.zeros( - [input.shape[0], self._RNNCell.output_size]).to(self.device) + [self.num_directions, input.shape[0], self.RNNCell.output_size]).to(self.device) for i in range(0, input.shape[1]): - hiddenState, cellState = self._RNNCell( - input[:, i, :], (hiddenState, cellState)) - hiddenStates[:, i, :] = hiddenState - cellStates[:, i, :] = cellState - return hiddenStates, cellStates + hiddenState[0], cellState[0] = self.RNNCell( + input[:, i, :], (hiddenState[0].clone(), cellState[0].clone())) + hiddenStates[:, i, :] = hiddenState[0] + cellStates[:, i, :] = cellState[0] + if self._bidirectional: + hiddenState[1], cellState[1] = self.RNNCell_reverse( + input[:, input.shape[1]-i-1, :], (hiddenState[1].clone(), cellState[1].clone())) + hiddenStates_reverse[:, i, :] = hiddenState[1] + cellStates_reverse[:, i, :] = cellState[1] + if not self._bidirectional: + return hiddenStates, cellStates + else: + return torch.cat([hiddenStates,hiddenStates_reverse],-1), torch.cat([cellStates,cellStates_reverse],-1) else: for i in range(0, input.shape[1]): - hiddenState = self._RNNCell(input[:, i, :], hiddenState) - hiddenStates[:, i, :] = hiddenState - return hiddenStates + hiddenState[0] = self.RNNCell(input[:, i, :], hiddenState[0].clone()) + hiddenStates[:, i, :] = hiddenState[0] + if self._bidirectional: + hiddenState[1] = self.RNNCell_reverse( + input[:, input.shape[1]-i-1, :], hiddenState[1].clone()) + hiddenStates_reverse[:, i, :] = hiddenState[1] + if not self._bidirectional: + return hiddenStates + else: + return torch.cat([hiddenStates,hiddenStates_reverse],-1) else: - if self._RNNCell.cellType == "LSTMLR": + if self.RNNCell.cellType == "LSTMLR": cellStates = torch.zeros( [input.shape[0], input.shape[1], - self._RNNCell.output_size]).to(self.device) + self.RNNCell.output_size]).to(self.device) + if self._bidirectional: + cellStates_reverse = torch.zeros( + [input.shape[0], input.shape[1], + self.RNNCell_reverse.output_size]).to(self.device) if cellState is None: cellState = torch.zeros( - [input.shape[1], self._RNNCell.output_size]).to(self.device) + [self.num_directions, input.shape[1], self.RNNCell.output_size]).to(self.device) for i in range(0, input.shape[0]): - hiddenState, cellState = self._RNNCell( - input[i, :, :], (hiddenState, cellState)) - hiddenStates[i, :, :] = hiddenState - cellStates[i, :, :] = cellState - return hiddenStates, cellStates + hiddenState[0], cellState[0] = self.RNNCell( + input[i, :, :], (hiddenState[0].clone(), cellState[0].clone())) + hiddenStates[i, :, :] = hiddenState[0] + cellStates[i, :, :] = cellState[0] + if self._bidirectional: + hiddenState[1], cellState[1] = self.RNNCell_reverse( + input[input.shape[0]-i-1, :, :], (hiddenState[1].clone(), cellState[1].clone())) + hiddenStates_reverse[i, :, :] = hiddenState[1] + cellStates_reverse[i, :, :] = cellState[1] + if not self._bidirectional: + return hiddenStates, cellStates + else: + return torch.cat([hiddenStates,hiddenStates_reverse],-1), torch.cat([cellStates,cellStates_reverse],-1) else: for i in range(0, input.shape[0]): - hiddenState = self._RNNCell(input[i, :, :], hiddenState) - hiddenStates[i, :, :] = hiddenState - return hiddenStates + hiddenState[0] = self.RNNCell(input[i, :, :], hiddenState[0].clone()) + hiddenStates[i, :, :] = hiddenState[0] + if self._bidirectional: + hiddenState[1] = self.RNNCell_reverse( + input[input.shape[0]-i-1, :, :], hiddenState[1].clone()) + hiddenStates_reverse[i, :, :] = hiddenState[1] + if not self._bidirectional: + return hiddenStates + else: + return torch.cat([hiddenStates,hiddenStates_reverse],-1) class LSTM(nn.Module): @@ -1031,14 +1083,26 @@ class LSTM(nn.Module): def __init__(self, input_size, hidden_size, gate_nonlinearity="sigmoid", update_nonlinearity="tanh", wRank=None, uRank=None, - wSparsity=1.0, uSparsity=1.0, batch_first=False): + wSparsity=1.0, uSparsity=1.0, batch_first=False, + bidirectional=False, is_shared_bidirectional=True): super(LSTM, self).__init__() + self._bidirectional = bidirectional + self._batch_first = batch_first + self._is_shared_bidirectional = is_shared_bidirectional self.cell = LSTMLRCell(input_size, hidden_size, gate_nonlinearity=gate_nonlinearity, update_nonlinearity=update_nonlinearity, wRank=wRank, uRank=uRank, wSparsity=wSparsity, uSparsity=uSparsity) - self.unrollRNN = BaseRNN(self.cell, batch_first=batch_first) + self.unrollRNN = BaseRNN(self.cell, batch_first=self._batch_first, bidirectional=self._bidirectional) + + if self._bidirectional is True and self._is_shared_bidirectional is False: + self.cell_reverse = LSTMLRCell(input_size, hidden_size, + gate_nonlinearity=gate_nonlinearity, + update_nonlinearity=update_nonlinearity, + wRank=wRank, uRank=uRank, + wSparsity=wSparsity, uSparsity=uSparsity) + self.unrollRNN = BaseRNN(self.cell, self.cell_reverse, batch_first=self._batch_first, bidirectional=self._bidirectional) def forward(self, input, hiddenState=None, cellState=None): return self.unrollRNN(input, hiddenState, cellState) @@ -1049,14 +1113,26 @@ class GRU(nn.Module): def __init__(self, input_size, hidden_size, gate_nonlinearity="sigmoid", update_nonlinearity="tanh", wRank=None, uRank=None, - wSparsity=1.0, uSparsity=1.0, batch_first=False): + wSparsity=1.0, uSparsity=1.0, batch_first=False, + bidirectional=False, is_shared_bidirectional=True): super(GRU, self).__init__() + self._bidirectional = bidirectional + self._batch_first = batch_first + self._is_shared_bidirectional = is_shared_bidirectional self.cell = GRULRCell(input_size, hidden_size, gate_nonlinearity=gate_nonlinearity, update_nonlinearity=update_nonlinearity, wRank=wRank, uRank=uRank, wSparsity=wSparsity, uSparsity=uSparsity) - self.unrollRNN = BaseRNN(self.cell, batch_first=batch_first) + self.unrollRNN = BaseRNN(self.cell, batch_first=self._batch_first, bidirectional=self._bidirectional) + + if self._bidirectional is True and self._is_shared_bidirectional is False: + self.cell_reverse = GRULRCell(input_size, hidden_size, + gate_nonlinearity=gate_nonlinearity, + update_nonlinearity=update_nonlinearity, + wRank=wRank, uRank=uRank, + wSparsity=wSparsity, uSparsity=uSparsity) + self.unrollRNN = BaseRNN(self.cell, self.cell_reverse, batch_first=self._batch_first, bidirectional=self._bidirectional) def forward(self, input, hiddenState=None, cellState=None): return self.unrollRNN(input, hiddenState, cellState) @@ -1067,14 +1143,26 @@ class UGRNN(nn.Module): def __init__(self, input_size, hidden_size, gate_nonlinearity="sigmoid", update_nonlinearity="tanh", wRank=None, uRank=None, - wSparsity=1.0, uSparsity=1.0, batch_first=False): + wSparsity=1.0, uSparsity=1.0, batch_first=False, + bidirectional=False, is_shared_bidirectional=True): super(UGRNN, self).__init__() + self._bidirectional = bidirectional + self._batch_first = batch_first + self._is_shared_bidirectional = is_shared_bidirectional self.cell = UGRNNLRCell(input_size, hidden_size, gate_nonlinearity=gate_nonlinearity, update_nonlinearity=update_nonlinearity, wRank=wRank, uRank=uRank, wSparsity=wSparsity, uSparsity=uSparsity) - self.unrollRNN = BaseRNN(self.cell, batch_first=batch_first) + self.unrollRNN = BaseRNN(self.cell, batch_first=self._batch_first, bidirectional=self._bidirectional) + + if self._bidirectional is True and self._is_shared_bidirectional is False: + self.cell_reverse = UGRNNLRCell(input_size, hidden_size, + gate_nonlinearity=gate_nonlinearity, + update_nonlinearity=update_nonlinearity, + wRank=wRank, uRank=uRank, + wSparsity=wSparsity, uSparsity=uSparsity) + self.unrollRNN = BaseRNN(self.cell, self.cell_reverse, batch_first=self._batch_first, bidirectional=self._bidirectional) def forward(self, input, hiddenState=None, cellState=None): return self.unrollRNN(input, hiddenState, cellState) @@ -1085,15 +1173,28 @@ class FastRNN(nn.Module): def __init__(self, input_size, hidden_size, gate_nonlinearity="sigmoid", update_nonlinearity="tanh", wRank=None, uRank=None, - wSparsity=1.0, uSparsity=1.0, alphaInit=-3.0, betaInit=3.0, batch_first=False): + wSparsity=1.0, uSparsity=1.0, alphaInit=-3.0, betaInit=3.0, + batch_first=False, bidirectional=False, is_shared_bidirectional=True): super(FastRNN, self).__init__() + self._bidirectional = bidirectional + self._batch_first = batch_first + self._is_shared_bidirectional = is_shared_bidirectional self.cell = FastRNNCell(input_size, hidden_size, gate_nonlinearity=gate_nonlinearity, update_nonlinearity=update_nonlinearity, wRank=wRank, uRank=uRank, wSparsity=wSparsity, uSparsity=uSparsity, alphaInit=alphaInit, betaInit=betaInit) - self.unrollRNN = BaseRNN(self.cell, batch_first=batch_first) + self.unrollRNN = BaseRNN(self.cell, batch_first=self._batch_first, bidirectional=self._bidirectional) + + if self._bidirectional is True and self._is_shared_bidirectional is False: + self.cell_reverse = FastRNNCell(input_size, hidden_size, + gate_nonlinearity=gate_nonlinearity, + update_nonlinearity=update_nonlinearity, + wRank=wRank, uRank=uRank, + wSparsity=wSparsity, uSparsity=uSparsity, + alphaInit=alphaInit, betaInit=betaInit) + self.unrollRNN = BaseRNN(self.cell, self.cell_reverse, batch_first=self._batch_first, bidirectional=self._bidirectional) def forward(self, input, hiddenState=None, cellState=None): return self.unrollRNN(input, hiddenState, cellState) @@ -1105,15 +1206,27 @@ class FastGRNN(nn.Module): def __init__(self, input_size, hidden_size, gate_nonlinearity="sigmoid", update_nonlinearity="tanh", wRank=None, uRank=None, wSparsity=1.0, uSparsity=1.0, zetaInit=1.0, nuInit=-4.0, - batch_first=False): + batch_first=False, bidirectional=False, is_shared_bidirectional=True): super(FastGRNN, self).__init__() + self._bidirectional = bidirectional + self._batch_first = batch_first + self._is_shared_bidirectional = is_shared_bidirectional self.cell = FastGRNNCell(input_size, hidden_size, gate_nonlinearity=gate_nonlinearity, update_nonlinearity=update_nonlinearity, wRank=wRank, uRank=uRank, wSparsity=wSparsity, uSparsity=uSparsity, zetaInit=zetaInit, nuInit=nuInit) - self.unrollRNN = BaseRNN(self.cell, batch_first=batch_first) + self.unrollRNN = BaseRNN(self.cell, batch_first=self._batch_first, bidirectional=self._bidirectional) + + if self._bidirectional is True and self._is_shared_bidirectional is False: + self.cell_reverse = FastGRNNCell(input_size, hidden_size, + gate_nonlinearity=gate_nonlinearity, + update_nonlinearity=update_nonlinearity, + wRank=wRank, uRank=uRank, + wSparsity=wSparsity, uSparsity=uSparsity, + zetaInit=zetaInit, nuInit=nuInit) + self.unrollRNN = BaseRNN(self.cell, self.cell_reverse, batch_first=self._batch_first, bidirectional=self._bidirectional) def getVars(self): return self.unrollRNN.getVars() @@ -1222,8 +1335,8 @@ def getVars(self): def get_model_size(self): ''' - Function to get aimed model size - ''' + Function to get aimed model size + ''' mats = self.getVars() endW = self._num_W_matrices endU = endW + self._num_U_matrices From 87c174017655854e37220cc4453c11bd01fb2647 Mon Sep 17 00:00:00 2001 From: SachinG007 Date: Sun, 3 May 2020 14:18:56 +0000 Subject: [PATCH 4/6] cuda gpu number fix --- examples/pytorch/FastCells/fastcell_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/pytorch/FastCells/fastcell_example.py b/examples/pytorch/FastCells/fastcell_example.py index 69a759c7e..6267fc0ae 100644 --- a/examples/pytorch/FastCells/fastcell_example.py +++ b/examples/pytorch/FastCells/fastcell_example.py @@ -11,7 +11,7 @@ def main(): # change cuda:0 to cuda:gpuid for specific allocation - device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu") + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Fixing seeds for reproducibility torch.manual_seed(42) np.random.seed(42) From fe862302e1b48ea68e5b7265234489c06232767a Mon Sep 17 00:00:00 2001 From: SachinG007 Date: Wed, 6 May 2020 20:44:02 +0000 Subject: [PATCH 5/6] PR173 merged, optimizer changes --- .../pytorch/FastCells/fastcell_example.py | 9 +++--- pytorch/edgeml_pytorch/trainer/fastTrainer.py | 31 ++++++++++--------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/examples/pytorch/FastCells/fastcell_example.py b/examples/pytorch/FastCells/fastcell_example.py index 6267fc0ae..91674894a 100644 --- a/examples/pytorch/FastCells/fastcell_example.py +++ b/examples/pytorch/FastCells/fastcell_example.py @@ -46,12 +46,13 @@ def main(): assert dataDimension % inputDims == 0, "Infeasible per step input, " + \ "Timesteps have to be integer" - timeSteps = int(Xtest.shape[1] / inputDims) - Xtest = np.reshape(Xtest, (-1, timeSteps, inputDims)) + timeSteps = int(dataDimension / inputDims) Xtrain = Xtrain.reshape((-1, timeSteps, inputDims)) - if not args.batch_first: - Xtest = np.swapaxes(Xtest, 0, 1) + Xtest = Xtest.reshape((-1, timeSteps, inputDims)) + + if not batch_first: Xtrain = np.swapaxes(Xtrain, 0, 1) + Xtest = np.swapaxes(Xtest, 0, 1) currDir = helpermethods.createTimeStampDir(dataDir, cell) diff --git a/pytorch/edgeml_pytorch/trainer/fastTrainer.py b/pytorch/edgeml_pytorch/trainer/fastTrainer.py index f5d7ad307..32c96b68d 100644 --- a/pytorch/edgeml_pytorch/trainer/fastTrainer.py +++ b/pytorch/edgeml_pytorch/trainer/fastTrainer.py @@ -9,6 +9,14 @@ from edgeml_pytorch.graph.rnn import * import numpy as np +class SimpleFC(nn.Module): + def __init__(self, input_size, num_classes, name="SimpleFC"): + super(SimpleFC, self).__init__() + self.FC = nn.Parameter(torch.randn([input_size, num_classes])) + self.FCbias = nn.Parameter(torch.randn([num_classes])) + + def forward(self, input): + return torch.matmul(input, self.FC) + self.FCbias class FastTrainer: @@ -50,23 +58,17 @@ def __init__(self, FastObj, numClasses, sW=1.0, sU=1.0, self.numMatrices = self.FastObj.num_weight_matrices self.totalMatrices = self.numMatrices[0] + self.numMatrices[1] - self.optimizer = self.optimizer() - self.RNN = BaseRNN(self.FastObj, batch_first=self.batch_first).to(self.device) - - self.FC = nn.Parameter(torch.randn( - [self.FastObj.output_size, self.numClasses])).to(self.device) - self.FCbias = nn.Parameter(torch.randn( - [self.numClasses])).to(self.device) - + self.simpleFC = SimpleFC(self.FastObj.output_size, self.numClasses).to(self.device) self.FastParams = self.FastObj.getVars() + self.optimizer = self.optimizer() def classifier(self, feats): ''' Can be raplaced by any classifier TODO: Make this a separate class if needed ''' - return torch.matmul(feats, self.FC) + self.FCbias + return self.simpleFC(feats) def computeLogits(self, input): ''' @@ -88,8 +90,9 @@ def optimizer(self): ''' Optimizer for FastObj Params ''' + paramList = list(self.FastObj.parameters()) + list(self.simpleFC.parameters()) optimizer = torch.optim.Adam( - self.FastObj.parameters(), lr=self.learningRate) + paramList, lr=self.learningRate) return optimizer @@ -171,12 +174,12 @@ def getModelSize(self): hasSparse = hasSparse or sparseFlag # Replace this with classifier class call - nnz, size, sparseFlag = utils.estimateNNZ(self.FC, 1.0) + nnz, size, sparseFlag = utils.estimateNNZ(self.simpleFC.FC, 1.0) totalnnZ += nnz totalSize += size hasSparse = hasSparse or sparseFlag - nnz, size, sparseFlag = utils.estimateNNZ(self.FCbias, 1.0) + nnz, size, sparseFlag = utils.estimateNNZ(self.simpleFC.FCbias, 1.0) totalnnZ += nnz totalSize += size hasSparse = hasSparse or sparseFlag @@ -344,8 +347,8 @@ def saveParams(self, currDir): np.save(os.path.join(currDir, "Bo.npy"), self.FastParams[self.totalMatrices + 3].data.cpu()) - np.save(os.path.join(currDir, "FC.npy"), self.FC.data.cpu()) - np.save(os.path.join(currDir, "FCbias.npy"), self.FCbias.data.cpu()) + np.save(os.path.join(currDir, "FC.npy"), self.simpleFC.FC.data.cpu()) + np.save(os.path.join(currDir, "FCbias.npy"), self.simpleFC.FCbias.data.cpu()) def train(self, batchSize, totalEpochs, Xtrain, Xtest, Ytrain, Ytest, decayStep, decayRate, dataDir, currDir): From 9b050b81a91d9190a697bbdd3a7078db2e25fd74 Mon Sep 17 00:00:00 2001 From: SachinG007 Date: Sun, 10 May 2020 15:56:19 +0000 Subject: [PATCH 6/6] Resolved few comments by aditya --- pytorch/edgeml_pytorch/graph/rnn.py | 49 +++++++++++++++-------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/pytorch/edgeml_pytorch/graph/rnn.py b/pytorch/edgeml_pytorch/graph/rnn.py index 988f7e495..ceed5a5e1 100644 --- a/pytorch/edgeml_pytorch/graph/rnn.py +++ b/pytorch/edgeml_pytorch/graph/rnn.py @@ -969,7 +969,7 @@ class BaseRNN(nn.Module): def __init__(self, cell: RNNCell, batch_first=False, cell_reverse: RNNCell=None, bidirectional=False): super(BaseRNN, self).__init__() - self.RNNCell = cell + self._RNNCell = cell self._batch_first = batch_first self._bidirectional = bidirectional if cell_reverse is not None: @@ -978,47 +978,50 @@ def __init__(self, cell: RNNCell, batch_first=False, cell_reverse: RNNCell=None, self.RNNCell_reverse = cell def getVars(self): - return self.RNNCell.getVars() + return self._RNNCell.getVars() def forward(self, input, hiddenState=None, cellState=None): self.device = input.device self.num_directions = 2 if self._bidirectional else 1 - # hidden - # for i in range(num_directions): + if self._bidirectional: + self.num_directions = 2 + else: + self.num_directions = 1 + hiddenStates = torch.zeros( [input.shape[0], input.shape[1], - self.RNNCell.output_size]).to(self.device) + self._RNNCell.output_size]).to(self.device) if self._bidirectional: hiddenStates_reverse = torch.zeros( [input.shape[0], input.shape[1], - self.RNNCell_reverse.output_size]).to(self.device) + self._RNNCell_reverse.output_size]).to(self.device) if hiddenState is None: hiddenState = torch.zeros( [self.num_directions, input.shape[0] if self._batch_first else input.shape[1], - self.RNNCell.output_size]).to(self.device) + self._RNNCell.output_size]).to(self.device) if self._batch_first is True: - if self.RNNCell.cellType == "LSTMLR": + if self._RNNCell.cellType == "LSTMLR": cellStates = torch.zeros( [input.shape[0], input.shape[1], - self.RNNCell.output_size]).to(self.device) + self._RNNCell.output_size]).to(self.device) if self._bidirectional: cellStates_reverse = torch.zeros( [input.shape[0], input.shape[1], - self.RNNCell_reverse.output_size]).to(self.device) + self._RNNCell_reverse.output_size]).to(self.device) if cellState is None: cellState = torch.zeros( - [self.num_directions, input.shape[0], self.RNNCell.output_size]).to(self.device) + [self.num_directions, input.shape[0], self._RNNCell.output_size]).to(self.device) for i in range(0, input.shape[1]): - hiddenState[0], cellState[0] = self.RNNCell( + hiddenState[0], cellState[0] = self._RNNCell( input[:, i, :], (hiddenState[0].clone(), cellState[0].clone())) hiddenStates[:, i, :] = hiddenState[0] cellStates[:, i, :] = cellState[0] if self._bidirectional: - hiddenState[1], cellState[1] = self.RNNCell_reverse( + hiddenState[1], cellState[1] = self._RNNCell_reverse( input[:, input.shape[1]-i-1, :], (hiddenState[1].clone(), cellState[1].clone())) hiddenStates_reverse[:, i, :] = hiddenState[1] cellStates_reverse[:, i, :] = cellState[1] @@ -1028,10 +1031,10 @@ def forward(self, input, hiddenState=None, return torch.cat([hiddenStates,hiddenStates_reverse],-1), torch.cat([cellStates,cellStates_reverse],-1) else: for i in range(0, input.shape[1]): - hiddenState[0] = self.RNNCell(input[:, i, :], hiddenState[0].clone()) + hiddenState[0] = self._RNNCell(input[:, i, :], hiddenState[0].clone()) hiddenStates[:, i, :] = hiddenState[0] if self._bidirectional: - hiddenState[1] = self.RNNCell_reverse( + hiddenState[1] = self._RNNCell_reverse( input[:, input.shape[1]-i-1, :], hiddenState[1].clone()) hiddenStates_reverse[:, i, :] = hiddenState[1] if not self._bidirectional: @@ -1039,24 +1042,24 @@ def forward(self, input, hiddenState=None, else: return torch.cat([hiddenStates,hiddenStates_reverse],-1) else: - if self.RNNCell.cellType == "LSTMLR": + if self._RNNCell.cellType == "LSTMLR": cellStates = torch.zeros( [input.shape[0], input.shape[1], - self.RNNCell.output_size]).to(self.device) + self._RNNCell.output_size]).to(self.device) if self._bidirectional: cellStates_reverse = torch.zeros( [input.shape[0], input.shape[1], - self.RNNCell_reverse.output_size]).to(self.device) + self._RNNCell_reverse.output_size]).to(self.device) if cellState is None: cellState = torch.zeros( - [self.num_directions, input.shape[1], self.RNNCell.output_size]).to(self.device) + [self.num_directions, input.shape[1], self._RNNCell.output_size]).to(self.device) for i in range(0, input.shape[0]): - hiddenState[0], cellState[0] = self.RNNCell( + hiddenState[0], cellState[0] = self._RNNCell( input[i, :, :], (hiddenState[0].clone(), cellState[0].clone())) hiddenStates[i, :, :] = hiddenState[0] cellStates[i, :, :] = cellState[0] if self._bidirectional: - hiddenState[1], cellState[1] = self.RNNCell_reverse( + hiddenState[1], cellState[1] = self._RNNCell_reverse( input[input.shape[0]-i-1, :, :], (hiddenState[1].clone(), cellState[1].clone())) hiddenStates_reverse[i, :, :] = hiddenState[1] cellStates_reverse[i, :, :] = cellState[1] @@ -1066,10 +1069,10 @@ def forward(self, input, hiddenState=None, return torch.cat([hiddenStates,hiddenStates_reverse],-1), torch.cat([cellStates,cellStates_reverse],-1) else: for i in range(0, input.shape[0]): - hiddenState[0] = self.RNNCell(input[i, :, :], hiddenState[0].clone()) + hiddenState[0] = self._RNNCell(input[i, :, :], hiddenState[0].clone()) hiddenStates[i, :, :] = hiddenState[0] if self._bidirectional: - hiddenState[1] = self.RNNCell_reverse( + hiddenState[1] = self._RNNCell_reverse( input[input.shape[0]-i-1, :, :], hiddenState[1].clone()) hiddenStates_reverse[i, :, :] = hiddenState[1] if not self._bidirectional: