diff --git a/LSTM/LSTMLayer.cpp b/LSTM/LSTMLayer.cpp index d51cd8c..dd6c3ac 100644 --- a/LSTM/LSTMLayer.cpp +++ b/LSTM/LSTMLayer.cpp @@ -3,10 +3,10 @@ using namespace NeuralNet; RNN_LSTM::RNN_LSTM(int numHiddenLayers0, int hiddenLayerInputDim0, - int hiddenLayerOutputDim0, int inputDim0, int outputDim0, + int hiddenLayerOutputDim0, int inputDim0, int outputDim0, double learningRate0, std::shared_ptr trainingX0, std::shared_ptr trainingY0): // netOutputLayer.actType = BaseLayer::tanh previously - netOutputLayer(new BaseLayer_LSTM(hiddenLayerOutputDim0, outputDim0, BaseLayer::linear)){ + netOutputLayer(new BaseLayer_LSTM(hiddenLayerOutputDim0, outputDim0, BaseLayer::tanh)){ // at beginning, we assume all the hidden layers have the same size, @@ -17,6 +17,7 @@ RNN_LSTM::RNN_LSTM(int numHiddenLayers0, int hiddenLayerInputDim0, rnnOutputDim = outputDim0; // this parameter is not used within sofar code trainingX = trainingX0; trainingY = trainingY0; + learningRate = learningRate0; @@ -246,7 +247,7 @@ void RNN_LSTM::backward() { netOutputLayer->clearAccuGrad(); - double learningRate = 0.005; + // double learningRate = 0.005; int T = trainingY->n_cols; for (int t = T - 1; t >= 0; t--){ diff --git a/LSTM/LSTMLayer.h b/LSTM/LSTMLayer.h index 60a116e..2106496 100644 --- a/LSTM/LSTMLayer.h +++ b/LSTM/LSTMLayer.h @@ -15,7 +15,7 @@ namespace NeuralNet { public: RNN_LSTM(int numHiddenLayers0, int hiddenLayerInputDim0, - int hiddenLayerOutputDim0, int inputDim0, int outputDim0, + int hiddenLayerOutputDim0, int inputDim0, int outputDim0, double learningRate0, std::shared_ptr trainingX0, std::shared_ptr trainingY0); void forward(); void backward(); @@ -34,7 +34,7 @@ namespace NeuralNet { std::shared_ptr trainingY, trainingX; int numHiddenLayers, hiddenLayerInputDim, hiddenLayerOutputDim; int rnnInputDim, rnnOutputDim; - + double learningRate; }; diff --git a/LSTM/Makefile b/LSTM/Makefile index c1988aa..ef071af 100644 --- a/LSTM/Makefile +++ b/LSTM/Makefile @@ -15,7 +15,7 @@ ARMA_LINKFLAGS=-L/usr/lib -L/opt/intel/mkl/lib/intel64 -larmadillo -lmkl_rt -lla CXXFLAGS = -std=c++0x -I$(MKL_INCLUDE) $(ARMA_INCLUDE) $(DEEPLEARNING_INCLUDE) -I/opt/boost/boost_1_57_0 -c -D__LINUX -DDEBUG -g3 LINKOPTFLAGS = -O3 -flto=4 -fwhole-program -march=native #LINKFLAGS = -static $(LINKOPTFLAGS) $(MKLLINKFLAGS) -ldl -LINK_DL_FLAGS = $(DEEPLEARNING_PATH) $(MKL_DL_LINKFLAGS) $(ARMA_LINKFLAGS) -ldeeplearning -ldl #$(LINKOPTFLAGS) +LINK_DL_FLAGS = $(DEEPLEARNING_PATH) $(MKL_DL_LINKFLAGS) $(ARMA_LINKFLAGS) -ldeeplearning -ldl -lprotobuf#$(LINKOPTFLAGS) #LINKFLAGS = #ODIR=obj ODIR = diff --git a/LSTM/legacy/RNN.cpp b/LSTM/legacy/RNN.cpp new file mode 100644 index 0000000..1069049 --- /dev/null +++ b/LSTM/legacy/RNN.cpp @@ -0,0 +1,292 @@ +#include "RNN.h" + + +void RNN::forwardPass(std::shared_ptr inputX) { + /* + *Y_iGate = (*W_iGate_i) * (*X).st() + (*W_iGate_c) * (*Y_cPrev).st(); + for (int i = 0; i < Y_iGate->n_rows; i++) + Y_iGate->row(i) += (*B_iGate).st(); + Y_iGate->transform([](double val) { + return 1.0/(1+exp(-val)); + }); + + *Y_cCandit = (*W_c_i) * (*X) + (*W_c_c) * (*Y_cPrev); + for (int i = 0; i < Y_cCandit->n_rows; i++) + Y_cCandit->row(i) += (*B_c).st(); + Y_cCandit->transform([](double val) { + return tanh(val); + }); + + *Y_fGate = (*W_fGate_i) * (*X) + (*W_fGate_c) * (*Y_cPrev); + for (int i = 0; i < Y_fGate->n_rows; i++) + Y_fGate->row(i) += (*B_fGate).st(); + Y_fGate->transform([](double val) { + return 1.0/(1+exp(-val)); + }); + + + (*Y_oGate) = (*W_oGate_i) * (*X) + (*W_oGate_c) * (*Y_cPrev); + for (int i = 0; i < Y_oGate->n_rows; i++) + Y_oGate->row(i) += (*B_oGate).st(); + Y_oGate->transform([](double val) { + return 1.0/(1+exp(-val)); + }); + + *S_c = (*Y_iGate) % (*Y_cCandit) + (*Y_fGate) % (*S_cPrev); + *S_cPrev = *S_c; + *H_c = *S_c; + H_c->transform([](double val) { + return tanh(val); + }); + *Y_cPrev = *Y_c; + *Y_c = (*Y_oGate) % (*H_c); + + outLayer.activateUp(Y_c); + /* + *Y = (*H).st() * (*W_oh).st() ; + for (int i = 0; i < Y->n_rows; i++) + Y->row(i) += (*B_o).st(); + */ + */ + + /* + LSTM forwardpass + */ + + //kl forwardpass: + // initialize for time series recursion ?? + + // input gate + // Y_cPrev = ms(t) in kl deduction + *Y_iGate = (*W_iGate_x) * (*X).st() + (*W_iGate_Sc) * (*S_cPrev).st() + (*W_iGate_Yc)*(*Y_cPrev).st(); // net_inj(t) in kl deduction + for (int i = 0; i < Y_iGate->n_rows; i++) + Y_iGate->row(i) += (*B_iGate).st(); + Y_iGate->transform([](double val) { // y_inj(t) in kl deduction + return 1.0/(1+exp(-val)); // f_inj takes sigmoid function + }); + + // cell candidate state + *Y_cCandit = (*W_Sc_x) * (*X).st() + (*W_Sc_Yc) * (*Y_cPrev).st(); // net_cjv(t) in kl deduction + for (int i = 0; i < Y_cCandit->n_rows; i++) + Y_cCandit->row(i) += (*B_c).st(); + Y_cCandit->transform([](double val) { // g(net_cjv(t)) in kl deduction + return tanh(val); // g taks tanh function + }); + + // forget gate + *Y_fGate = (*W_fGate_x) * (*X).st() + (*W_fGate_Yc)*(*Y_cPrev).st() + (*W_fGate_Sc) * (*S_cPrev).st(); // net_phi(t) in kl deduction + for (int i = 0; i < Y_fGate->n_rows; i++) + Y_fGate->row(i) += (*B_fGate).st(); + Y_fGate->transform([](double val) { // y_phi(t) in kl deduction + return 1.0/(1+exp(-val)); // f_phi_j in kl deduction + }); + + // output gate + (*Y_oGate) = (*W_oGate_x) * (*X).st() +(*W_oGate_Yc)* (*Y_cPrev).st() + (*W_oGate_Yc) * (*Y_cPrev).st(); // net_outj(t) in kl deduction + for (int i = 0; i < Y_oGate->n_rows; i++) + Y_oGate->row(i) += (*B_oGate).st(); + Y_oGate->transform([](double val) { // y_outj(t) in kl deduction + return 1.0/(1+exp(-val)); // f_outj in kl deduction + }); + + // cell state based on input gate and forget gate + *S_c = (*Y_iGate) % (*Y_cCandit) + (*Y_fGate) % (*S_cPrev); // S_cjv(t) in kl deduction + *S_cPrev = *S_c; + *H_c = *S_c; // h(S_cjv(t)) in kl figure + H_c->transform([](double val) { + return tanh(val); // h takes tanh function + }); + + *Y_c = (*Y_oGate) % (*H_c); // Y_c = ms(t) in kl deduction + *Y_cPrev = *Y_c; + + outLayer.activateUp(outputLayer.W * Y_c); // Yk = fk(net_k(t)) = fk(W_k_m * ms(t)) + /* + *Y = (*H).st() * (*W_oh).st() ; + for (int i = 0; i < Y->n_rows; i++) + Y->row(i) += (*B_o).st(); + */ +} + + +void RNN::backwardPass() { + + /* + arma::mat delta_output_temp = (*outLayer.outputY) - (*trainingY); + arma::vec delta_output = arma::sum(delta_output_temp,1); +// calcuate the derivatives respect to internal states + arma::mat H_cDeriv = (1 - H_c % H_c); + + arma::mat e_Sc = Y_oGate * (H_cDeriv) * (*outLayer.W) * delta_output; + + ds_iGate_i = ds_iGate_i_prev * Y_fGate + Y_cCandit * (Y_iGate)* (1-Y_iGate) * X; + ds_iGate_c = ds_iGate_c_prev * Y_fGate + Y_cCandit * (Y_iGate)* (1-Y_iGate) * Y_cPrev; + + ds_fGate_i = ds_fGate_i_prev * Y_fGate + S_cPrev * (Y_fGate)* (1-Y_fGate) * X; + ds_fGate_c = ds_fGate_c_prev * Y_fGate + S_cPrev * (Y_fGate)* (1-Y_fGate) * Y_cPrev; + + ds_c_i = ds_c_i_prev * Y_fGate + Y_iGate * (Y_cPrev)* (1-Y_cPrev) * X; + ds_c_c = ds_c_c_prev * Y_fGate + Y_iGate * (Y_cPrev)* (1-Y_cPrev) * Y_cPrev; + + + ds_bias = ds_bias_prev * Y_fGate; + + +// update the output weight and bias, same as MLP + *(outLayer.W) -= alpha * delta_output * (*outLayer.inputX); + *(outLayer.B) -= alpha * delta_output; + + +// now update the output gate Weight and bias +// W_oGate_i + deriv = Y_oGate % (1 - Y_oGate) + delta_oGate = deriv * (*H_c) * outLayer.W * delta_output; +// oGate's input is X + grad = delta_oGate * (*X) + *W_oGate_i -= alpha * grad; + *B_oGate -= alpha * delta_oGate; +// W_oGate_c + grad = delta_oGate * (*Y_cPrev); + *W_oGate_c -= alpha *grad; + +// update the in gate weight and bias +// W_iGate_i + grad = e_Sc * dS_iGate_i; + W_iGate_i -= alpha * grad; +// W_iGate_c + grad = e_Sc * ds_iGate_c; + W_iGate_c -= alpha * grad; + +// update the forget gate weight and bias +// W_fGate_i + grad = e_Sc * ds_fGate_i; + W_fGate_c -= alpha * grad; +// W_fGate_c + grad = e_Sc * ds_fGate_c; + W_fGate_c -= alpha * grad; +// update the cell weight and bias +// W_c_i + grad = e_Sc * ds_c_i; + W_c_i -= alpha * grad; +// W_c_c + grad = e_Sc * ds_c_c; + W_c_c = alpha *grad; + +// update the cell bias + B_c -= e_Sc * ds_bias; +*/ + + /* + LSTM backpropagation + */ + //kl backprop: + arma::mat delta_output_temp = (*outLayer.outputY) - (*trainingY); + arma::vec delta_output = arma::sum(delta_output_temp,1); +// calcuate the derivatives respect to internal states + arma::mat H_cDeriv = (1 - H_c % H_c); // h'(S_cjv(t)) in kl deduction, tanh derivative + + // internal state error + arma::mat e_Sc = Y_oGate * (H_cDeriv) * (*outLayer.W) * delta_output; // e_scjv(t) in kl deduction P2, outputLayer.W is the W_km + + // input gate weigth derivative, including d_W_i_x, d_W_i_Yc(d_W_i_ms), d_W_i_Sc + *ds_iGate_x = (*ds_iGate_x_prev) * (*Y_fGate) + (*Y_cCandit) * (*Y_iGate)* (1-*Y_iGate) * (*X); // d_W_i_x + *ds_iGate_Yc = (*ds_iGate_Yc_prev) * (*Y_fGate) + (*Y_cCandit) * (*Y_iGate)* (1-*Y_iGate) * (*S_cPrev); // d_W_i_Sc + *ds_iGate_Sc = (*ds_iGate_Sc_prev) * (*Y_fGate) + (*Y_cCandit) * (*Y_iGate)* (1-*Y_iGate) * (*Y_cPrev); // d_W_i_Yc + + // forget gate weigth derivative, including d_W_f_x, d_W_f_Yc (d_W_f_ms), d_W_f_Sc + *ds_fGate_x = (*ds_fGate_x_prev) * (*Y_fGate) + (*S_cPrev) * (*Y_fGate)* (1-*Y_fGate) * (*X); // d_W_f_x + *ds_fGate_Yc = (*ds_fGate_Yc_prev) * (*Y_fGate) + (*S_cPrev) * (*Y_fGate)* (1-*Y_fGate) * (*Y_cPrev); // d_W_f_Yc + *ds_fGate_Sc = (*ds_fGate_Sc_prev) * (*Y_fGate) + (*S_cPrev) * (*Y_fGate)* (1-*Y_fGate) * (*S_cPrev); // d_W_f_Sc + + // cell input weigth derivative, including d_W_Sc_x, d_W_Sc_Yc + *ds_Sc_x = (*ds_Sc_x_prev) * (*Y_fGate) + (*Y_iGate) * (*Y_cPrev)* (1-*Y_cPrev) * (*X); // d_W_Sc_x + *ds_Sc_Yc = (*ds_Sc_Yc_prev) * (*Y_fGate) + (*Y_iGate) * (*Y_cPrev)* (1-*Y_cPrev) * (*Y_cPrev); // d_W_Sc_Yc + + + ds_bias = ds_bias_prev * Y_fGate; + + +// update the output weight and bias, same as MLP + *(outLayer.W) -= alpha * delta_output * (*outLayer.inputX); + *(outLayer.B) -= alpha * delta_output; + + +// now update the output gate Weight and bias +// W_oGate_i + deriv = Y_oGate % (1 - Y_oGate) + delta_oGate = deriv * (*H_c) * outLayer.W * delta_output; +// oGate's input is X + grad = delta_oGate * (*X) + *W_oGate_i -= alpha * grad; + *B_oGate -= alpha * delta_oGate; +// W_oGate_Yc + grad = delta_oGate * (*Y_cPrev); + *W_oGate_Yc -= alpha *grad; + // W_oGate_Sc + grad = delta_oGate * (*S_cPrev); + *W_oGate_Sc -= alpha *grad; + +// update the in gate weight and bias +// W_iGate_x + grad = e_Sc * ds_iGate_x; + W_iGate_i -= alpha * grad; +// W_iGate_Yc + grad = e_Sc * ds_iGate_Yc; + W_iGate_Yc -= alpha * grad; +// W_iGate_Sc + grad = e_Sc * ds_iGate_Sc; + W_iGate_Sc -= alpha * grad; + +// update the forget gate weight and bias +// W_fGate_x + grad = e_Sc * ds_fGate_x; + W_fGate_x -= alpha * grad; +// W_fGate_Yc + grad = e_Sc * ds_fGate_Yc; + W_fGate_Yc -= alpha * grad; +// W_fGate_Sc + grad = e_Sc * ds_fGate_Sc; + W_fGate_Sc -= alpha * grad; + +// update the cell weight and bias +// W_Sc_i + grad = e_Sc * ds_Sc_i; + W_c_x -= alpha * grad; +// W_Sc_Yc + grad = e_Sc * ds_Sc_Yc; + W_Sc_Yc = alpha *grad; + +// update the cell bias + B_c -= e_Sc * ds_bias; + +} + +void RNN::train(){ + /* + LSTM training + */ + std::shared_ptr subInput = std::make_shared(); + std::shared_ptr subInputY = std::make_shared(); + std::shared_ptr delta = std::make_shared(); + int ntimes; + double error, errorTotal; + int size = trainingPara.miniBatchSize; + for (int epoch = 0; epoch < trainingPara.NEpoch; epoch++) { + std::cout << epoch << std::endl; + ntimes = numInstance / trainingPara.miniBatchSize; + errorTotal = 0.0; + for (int i = 0; i < ntimes; i++) { + (*subInput) = trainingX->slices(i*size*nChanel,(i+1)*size*nChanel-1); + (*subInputY) = trainingY->rows(i*size,(i+1)*size-1); + feedForward(subInput); + // output->print(); + (*delta) = ((*output) - (*subInputY)).st(); + // subInputY->print(); + backProp(delta); + error = arma::sum(arma::sum((*delta).st() * (*delta))); + errorTotal += error; + } + std::cout << errorTotal << std::endl; + } + +} diff --git a/LSTM/legacy/RNN.h b/LSTM/legacy/RNN.h new file mode 100644 index 0000000..b815abd --- /dev/null +++ b/LSTM/legacy/RNN.h @@ -0,0 +1,46 @@ +#include +#include +#include "../BaseLayer/BaseLayer.h" + + +struct InputLayer {}; +struct LSTM {}; +struct OutputLayer {}; + + +class RNN { + +public: + struct TrainingPara{}; + // methods + RNN(int inputDim0, int outputDim0, int numCells0, TrainingPara trainingPara0); + //void setTrainingData(std::shared_ptr trainingX0, std::shared_ptr trainingY0); + void initializeWeight(); + void forwardPass(); + void backwardPass(); + + // attributes + //internal states + std::shared_ptr Y_iGate, Y_cCandit, Y_fGate, Y_oGate, S_c, S_cPrev, H_c, Y_c; + // weights + std::shared_ptr W_iGate_x, W_iGate_Sc, W_iGate_Yc; + std::shared_ptr W_Sc_x, W_Sc_Yc; + std::shared_ptr W_fGate_x, W_fGate_Yc, W_fGate_Sc; + std::shared_ptr W_oGate_x, W_oGate_Yc, W_oGate_Sc; + // derivatives + std::shared_ptr ds_iGate_x, ds_iGate_Yc, ds_iGate_Sc, ds_iGate_x_prev, ds_iGate_Yc_prev, ds_iGate_Sc_prev; + std::shared_ptr ds_fGate_x, ds_fGate_Yc, ds_fGate_Sc, ds_fGate_x_prev, ds_fGate_Yc_prev, ds_fGate_Sc_prev; + std::shared_ptr ds_Sc_x, ds_Sc_Yc, ds_Sc_x_prev, ds_Sc_Yc_prev; + std::shared_ptr ds_bias, ds_bias_prev; + + std::shared_ptr U_ci, U_cf, U_cc, U_io, V_o; + std::shared_ptr trainingX, trainingY; + std::shared_ptr inputX; + std::shared_ptr H, C, H_prev; + std::shared_ptr B_igate, B_ogate, B_fgate; + int inputDim, outputDim, numCells; + BaseLayer outLayer; + + +}; + diff --git a/LSTM/main.cpp b/LSTM/main.cpp index 8d4fb24..e97f5d0 100644 --- a/LSTM/main.cpp +++ b/LSTM/main.cpp @@ -7,38 +7,134 @@ #include #include #include +#include #include "LSTMLayer.h" -//#include "common.h" +#include "common.h" using namespace NeuralNet; +using namespace DeepLearning; void workOnSequenceGeneration(std::shared_ptr trainingY); -void testForward(); -void trainRNN_LSTM(); -void testGrad(); -void testDynamics(); +void testForward(char* filename, NeuralNetParameter message); +void trainRNN_LSTM(char* filename, NeuralNetParameter message); +void testGrad(char* filename, NeuralNetParameter message); +void testDynamics(char* filename,NeuralNetParameter message); +void aLittleTimerGenerator(std::shared_ptr trainingX, + std::shared_ptr trainingY); -void genSimData(); // generate simulation data -double f_x(double t); int main(int argc, char *argv[]) { // testForward(); // workOnSequenceGeneration(); // testGrad(); // trainRNN_LSTM(); - testDynamics(); + //std::shared_ptr trainingX(new arma::mat(1,20)); + //std::shared_ptr trainingY(new arma::mat(1,20)); + //aLittleTimerGenerator(trainingX,trainingY); + NeuralNetParameter message; + testDynamics(argv[1], message); return 0; } -// use LSTM to approximate a dynamical system -void testDynamics(){ +void aLittleTimerGenerator(std::shared_ptr trainingX, + std::shared_ptr trainingY){ + + int T = trainingY->n_elem; + + std::random_device device; + std::mt19937 gen(device()); + std::bernoulli_distribution distribution(0.1); + std::uniform_real_distribution<> dis(0, 1); + + arma::mat input(2, T); + arma::mat output(1, T); + + + for(int i=0;i trainingX(new arma::mat(1,10)); - std::shared_ptr trainingY(new arma::mat(1,10)); + //NeuralNetParameter message; + ReadProtoFromTextFile(filename, &message); + int seriesLength = message.rnnstruct().timeserieslength(); + std::cout << seriesLength << std::endl; + + std::shared_ptr trainingX(new arma::mat(1,seriesLength)); + std::shared_ptr trainingY(new arma::mat(1,seriesLength)); + /* // initialize trainingX->zeros(); trainingY->at(0) = 0.9999; @@ -52,15 +148,21 @@ void testDynamics(){ // trainingY->at(i) = sin(i); } + */ + aLittleTimerGenerator(trainingX, trainingY); + + int iterations = message.neuralnettrainingparameter().nepoch(); - int iterations = 5000; - /* RNN constructor parameters passed as: RNN(int numHiddenLayers0, int hiddenLayerInputDim0, - int hiddenLayerOutputDim0, int inputDim0, int outputDim0, + int hiddenLayerOutputDim0, int inputDim0, int outputDim0, double learningRate, std::shared_ptr trainingX0, std::shared_ptr trainingY0) */ - RNN_LSTM lstm(3, 8, 8, 1, 1, trainingX, trainingY); + RNN_LSTM lstm(message.rnnstruct().numhiddenlayers(), message.rnnstruct().hiddenlayerinputdim(), + message.rnnstruct().hiddenlayeroutputdim(), message.rnnstruct().inputdim(), + message.rnnstruct().outputdim(), message.neuralnettrainingparameter().learningrate(), + trainingX, trainingY); + // train the LSTM model by iterations for (int iter = 0; iter < iterations; iter++) { lstm.train(); @@ -77,8 +179,10 @@ void testDynamics(){ // test the gradients by numerical gradients checking -void testGrad() { +void testGrad(char* filename, NeuralNetParameter message) { + ReadProtoFromTextFile(filename, &message); + std::shared_ptr trainingX(new arma::mat(1, 10)); trainingX->randu(1, 10); std::shared_ptr trainingY(new arma::mat()); @@ -89,11 +193,14 @@ void testGrad() { int hiddenLayerOutputDim0, int inputDim0, int outputDim0, std::shared_ptr trainingX0, std::shared_ptr trainingY0) */ - RNN_LSTM rnn(3, 2, 2, 1, 1, trainingX, trainingY); + RNN_LSTM lstm(message.rnnstruct().numhiddenlayers(), message.rnnstruct().hiddenlayerinputdim(), + message.rnnstruct().hiddenlayeroutputdim(), message.rnnstruct().inputdim(), + message.rnnstruct().outputdim(), message.neuralnettrainingparameter().learningrate(), + trainingX, trainingY); // before applying the LSTM backprop model, generate numerical gradients by just forward pass. - rnn.calNumericGrad(); + lstm.calNumericGrad(); // train the LSTM model by one iteration to generate gradient from the model - rnn.train(); + lstm.train(); } @@ -104,8 +211,10 @@ void workOnSequenceGeneration(std::shared_ptr trainingY) { trainingY->print(); } -void testForward() { +void testForward(char* filename, NeuralNetParameter message) { + ReadProtoFromTextFile(filename, &message); + std::shared_ptr trainingX(new arma::mat()); trainingX->randn(1, 10); std::shared_ptr trainingY(new arma::mat()); @@ -114,14 +223,19 @@ void testForward() { // int hiddenLayerOutputDim0, int inputDim0, int outputDim0, // std::shared_ptr trainingX0, std::shared_ptr trainingY0) - RNN_LSTM rnn(1, 2, 2, 1, 1, trainingX, trainingY); - rnn.forward(); - rnn.backward(); + RNN_LSTM lstm(message.rnnstruct().numhiddenlayers(), message.rnnstruct().hiddenlayerinputdim(), + message.rnnstruct().hiddenlayeroutputdim(), message.rnnstruct().inputdim(), + message.rnnstruct().outputdim(), message.neuralnettrainingparameter().learningrate(), + trainingX, trainingY); + lstm.forward(); + lstm.backward(); } -void trainRNN_LSTM() { - +void trainRNN_LSTM(char* filename, NeuralNetParameter message) { + + ReadProtoFromTextFile(filename, &message); + std::shared_ptr trainingX(new arma::mat()); std::shared_ptr trainingY(new arma::mat()); @@ -148,7 +262,10 @@ void trainRNN_LSTM() { int hiddenLayerOutputDim0, int inputDim0, int outputDim0, std::shared_ptr trainingX0, std::shared_ptr trainingY0) */ - RNN_LSTM lstm(4, 8, 8, 1, 1, trainingX, trainingY); + RNN_LSTM lstm(message.rnnstruct().numhiddenlayers(), message.rnnstruct().hiddenlayerinputdim(), + message.rnnstruct().hiddenlayeroutputdim(), message.rnnstruct().inputdim(), + message.rnnstruct().outputdim(), message.neuralnettrainingparameter().learningrate(), + trainingX, trainingY); // train the LSTM model by iterations for (int iter = 0; iter < iterations; iter++) { lstm.train(); @@ -160,29 +277,3 @@ void trainRNN_LSTM() { } } -/*void genSimData(std::shared_ptr trainingX){ - - int TotalLength = 10; - double mean = 0; - double max_abs = 0; - for (int i = 0; i < TotalLength; ++i) { - double val = f_x(i * 0.01); - max_abs = max(max_abs, abs(val)); - } - for (int i = 0; i < TotalLength; ++i) { - mean += f_x(i * 0.01) / max_abs; - } - mean /= TotalLength; - for (int i = 0; i < TotalLength; ++i) { - trainingX[i] = f_x(i * 0.01) / max_abs - mean; - } - - -} - -double f_x(double t) { - - return 0.5 * sin(2 * t) - 0.05 * cos(17 * t + 0.8) - + 0.05 * sin(25 * t + 10) - 0.02 * cos(45 * t + 0.3); - -}*/ \ No newline at end of file diff --git a/LSTM/net.prototxt b/LSTM/net.prototxt new file mode 100644 index 0000000..138f204 --- /dev/null +++ b/LSTM/net.prototxt @@ -0,0 +1,28 @@ +layerStruct{ + name: "BaseLayer1" + inputDim: 10 + outputDim: 20 + activationType: sigmoid +} + +layerStruct{ + name: "BaseLayer2" + inputDim: 10 + outputDim: 20 + activationType: sigmoid +} + +neuralNetTrainingParameter{ + learningRate: 0.0001 + miniBatchSize: 10 + NEpoch: 5000 +} + +rnnStruct{ + numHiddenLayers: 3 + hiddenLayerInputDim: 8 + hiddenLayerOutputDim: 8 + inputDim: 2 + outputDim: 1 + timeSeriesLength : 1000 +} diff --git a/LSTM/test b/LSTM/test new file mode 100755 index 0000000..38377bd Binary files /dev/null and b/LSTM/test differ diff --git a/RNN/test b/RNN/test new file mode 100755 index 0000000..5e23583 Binary files /dev/null and b/RNN/test differ diff --git a/include/DeepLearning.pb.h b/include/DeepLearning.pb.h index 1bbf1e6..777fd79 100644 --- a/include/DeepLearning.pb.h +++ b/include/DeepLearning.pb.h @@ -460,6 +460,13 @@ class RNNStructParameter : public ::google::protobuf::Message { inline ::google::protobuf::int32 outputdim() const; inline void set_outputdim(::google::protobuf::int32 value); + // optional int32 timeSeriesLength = 6; + inline bool has_timeserieslength() const; + inline void clear_timeserieslength(); + static const int kTimeSeriesLengthFieldNumber = 6; + inline ::google::protobuf::int32 timeserieslength() const; + inline void set_timeserieslength(::google::protobuf::int32 value); + // @@protoc_insertion_point(class_scope:DeepLearning.RNNStructParameter) private: inline void set_has_numhiddenlayers(); @@ -472,6 +479,8 @@ class RNNStructParameter : public ::google::protobuf::Message { inline void clear_has_inputdim(); inline void set_has_outputdim(); inline void clear_has_outputdim(); + inline void set_has_timeserieslength(); + inline void clear_has_timeserieslength(); ::google::protobuf::UnknownFieldSet _unknown_fields_; @@ -482,6 +491,7 @@ class RNNStructParameter : public ::google::protobuf::Message { ::google::protobuf::int32 hiddenlayeroutputdim_; ::google::protobuf::int32 inputdim_; ::google::protobuf::int32 outputdim_; + ::google::protobuf::int32 timeserieslength_; friend void protobuf_AddDesc_DeepLearning_2eproto(); friend void protobuf_AssignDesc_DeepLearning_2eproto(); friend void protobuf_ShutdownFile_DeepLearning_2eproto(); @@ -1286,6 +1296,30 @@ inline void RNNStructParameter::set_outputdim(::google::protobuf::int32 value) { // @@protoc_insertion_point(field_set:DeepLearning.RNNStructParameter.outputDim) } +// optional int32 timeSeriesLength = 6; +inline bool RNNStructParameter::has_timeserieslength() const { + return (_has_bits_[0] & 0x00000020u) != 0; +} +inline void RNNStructParameter::set_has_timeserieslength() { + _has_bits_[0] |= 0x00000020u; +} +inline void RNNStructParameter::clear_has_timeserieslength() { + _has_bits_[0] &= ~0x00000020u; +} +inline void RNNStructParameter::clear_timeserieslength() { + timeserieslength_ = 0; + clear_has_timeserieslength(); +} +inline ::google::protobuf::int32 RNNStructParameter::timeserieslength() const { + // @@protoc_insertion_point(field_get:DeepLearning.RNNStructParameter.timeSeriesLength) + return timeserieslength_; +} +inline void RNNStructParameter::set_timeserieslength(::google::protobuf::int32 value) { + set_has_timeserieslength(); + timeserieslength_ = value; + // @@protoc_insertion_point(field_set:DeepLearning.RNNStructParameter.timeSeriesLength) +} + // ------------------------------------------------------------------- // NeuralNetTrainingParameter diff --git a/src/DeepLearning.pb.cc b/src/DeepLearning.pb.cc index 9d2ef80..1f77229 100644 --- a/src/DeepLearning.pb.cc +++ b/src/DeepLearning.pb.cc @@ -84,12 +84,13 @@ void protobuf_AssignDesc_DeepLearning_2eproto() { sizeof(LayerStructParameter)); LayerStructParameter_ActivationType_descriptor_ = LayerStructParameter_descriptor_->enum_type(0); RNNStructParameter_descriptor_ = file->message_type(2); - static const int RNNStructParameter_offsets_[5] = { + static const int RNNStructParameter_offsets_[6] = { GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(RNNStructParameter, numhiddenlayers_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(RNNStructParameter, hiddenlayerinputdim_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(RNNStructParameter, hiddenlayeroutputdim_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(RNNStructParameter, inputdim_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(RNNStructParameter, outputdim_), + GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(RNNStructParameter, timeserieslength_), }; RNNStructParameter_reflection_ = new ::google::protobuf::internal::GeneratedMessageReflection( @@ -179,18 +180,19 @@ void protobuf_AddDesc_DeepLearning_2eproto() { "1.DeepLearning.LayerStructParameter.Acti" "vationType\022\014\n\004name\030\004 \001(\t\022\014\n\004type\030\005 \001(\t\"@" "\n\016ActivationType\022\013\n\007sigmoid\020\001\022\010\n\004tanh\020\002\022" - "\n\n\006linear\020\003\022\013\n\007softmax\020\004\"\215\001\n\022RNNStructPa" + "\n\n\006linear\020\003\022\013\n\007softmax\020\004\"\247\001\n\022RNNStructPa" "rameter\022\027\n\017numHiddenLayers\030\001 \001(\005\022\033\n\023hidd" "enLayerInputDim\030\002 \001(\005\022\034\n\024hiddenLayerOutp" "utDim\030\003 \001(\005\022\020\n\010inputDim\030\004 \001(\005\022\021\n\toutputD" - "im\030\005 \001(\005\"\240\002\n\032NeuralNetTrainingParameter\022" - "\024\n\014learningRate\030\001 \001(\001\022\017\n\007maxIter\030\002 \001(\005\022\025" - "\n\rminiBatchSize\030\003 \001(\005\022\016\n\006NEpoch\030\004 \001(\005\022\022\n" - "\003epi\030\005 \001(\001:\0051e-06\022N\n\013trainerType\030\006 \001(\01624" - ".DeepLearning.NeuralNetTrainingParameter" - ".TrainerType:\003SGD\022\025\n\tdecayRate\030\007 \001(\001:\00210" - "\022\025\n\010momentum\030\010 \001(\001:\0030.9\"\"\n\013TrainerType\022\007" - "\n\003SGD\020\001\022\n\n\006iRProp\020\002", 939); + "im\030\005 \001(\005\022\030\n\020timeSeriesLength\030\006 \001(\005\"\240\002\n\032N" + "euralNetTrainingParameter\022\024\n\014learningRat" + "e\030\001 \001(\001\022\017\n\007maxIter\030\002 \001(\005\022\025\n\rminiBatchSiz" + "e\030\003 \001(\005\022\016\n\006NEpoch\030\004 \001(\005\022\022\n\003epi\030\005 \001(\001:\0051e" + "-06\022N\n\013trainerType\030\006 \001(\01624.DeepLearning." + "NeuralNetTrainingParameter.TrainerType:\003" + "SGD\022\025\n\tdecayRate\030\007 \001(\001:\00210\022\025\n\010momentum\030\010" + " \001(\001:\0030.9\"\"\n\013TrainerType\022\007\n\003SGD\020\001\022\n\n\006iRP" + "rop\020\002", 965); ::google::protobuf::MessageFactory::InternalRegisterGeneratedFile( "DeepLearning.proto", &protobuf_RegisterTypes); NeuralNetParameter::default_instance_ = new NeuralNetParameter(); @@ -1115,6 +1117,7 @@ const int RNNStructParameter::kHiddenLayerInputDimFieldNumber; const int RNNStructParameter::kHiddenLayerOutputDimFieldNumber; const int RNNStructParameter::kInputDimFieldNumber; const int RNNStructParameter::kOutputDimFieldNumber; +const int RNNStructParameter::kTimeSeriesLengthFieldNumber; #endif // !_MSC_VER RNNStructParameter::RNNStructParameter() @@ -1140,6 +1143,7 @@ void RNNStructParameter::SharedCtor() { hiddenlayeroutputdim_ = 0; inputdim_ = 0; outputdim_ = 0; + timeserieslength_ = 0; ::memset(_has_bits_, 0, sizeof(_has_bits_)); } @@ -1185,8 +1189,8 @@ void RNNStructParameter::Clear() { ::memset(&first, 0, n); \ } while (0) - if (_has_bits_[0 / 32] & 31) { - ZR_(numhiddenlayers_, outputdim_); + if (_has_bits_[0 / 32] & 63) { + ZR_(numhiddenlayers_, timeserieslength_); } #undef OFFSET_OF_FIELD_ @@ -1276,6 +1280,21 @@ bool RNNStructParameter::MergePartialFromCodedStream( } else { goto handle_unusual; } + if (input->ExpectTag(48)) goto parse_timeSeriesLength; + break; + } + + // optional int32 timeSeriesLength = 6; + case 6: { + if (tag == 48) { + parse_timeSeriesLength: + DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< + ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( + input, ×erieslength_))); + set_has_timeserieslength(); + } else { + goto handle_unusual; + } if (input->ExpectAtEnd()) goto success; break; } @@ -1330,6 +1349,11 @@ void RNNStructParameter::SerializeWithCachedSizes( ::google::protobuf::internal::WireFormatLite::WriteInt32(5, this->outputdim(), output); } + // optional int32 timeSeriesLength = 6; + if (has_timeserieslength()) { + ::google::protobuf::internal::WireFormatLite::WriteInt32(6, this->timeserieslength(), output); + } + if (!unknown_fields().empty()) { ::google::protobuf::internal::WireFormat::SerializeUnknownFields( unknown_fields(), output); @@ -1365,6 +1389,11 @@ ::google::protobuf::uint8* RNNStructParameter::SerializeWithCachedSizesToArray( target = ::google::protobuf::internal::WireFormatLite::WriteInt32ToArray(5, this->outputdim(), target); } + // optional int32 timeSeriesLength = 6; + if (has_timeserieslength()) { + target = ::google::protobuf::internal::WireFormatLite::WriteInt32ToArray(6, this->timeserieslength(), target); + } + if (!unknown_fields().empty()) { target = ::google::protobuf::internal::WireFormat::SerializeUnknownFieldsToArray( unknown_fields(), target); @@ -1412,6 +1441,13 @@ int RNNStructParameter::ByteSize() const { this->outputdim()); } + // optional int32 timeSeriesLength = 6; + if (has_timeserieslength()) { + total_size += 1 + + ::google::protobuf::internal::WireFormatLite::Int32Size( + this->timeserieslength()); + } + } if (!unknown_fields().empty()) { total_size += @@ -1454,6 +1490,9 @@ void RNNStructParameter::MergeFrom(const RNNStructParameter& from) { if (from.has_outputdim()) { set_outputdim(from.outputdim()); } + if (from.has_timeserieslength()) { + set_timeserieslength(from.timeserieslength()); + } } mutable_unknown_fields()->MergeFrom(from.unknown_fields()); } @@ -1482,6 +1521,7 @@ void RNNStructParameter::Swap(RNNStructParameter* other) { std::swap(hiddenlayeroutputdim_, other->hiddenlayeroutputdim_); std::swap(inputdim_, other->inputdim_); std::swap(outputdim_, other->outputdim_); + std::swap(timeserieslength_, other->timeserieslength_); std::swap(_has_bits_[0], other->_has_bits_[0]); _unknown_fields_.Swap(&other->_unknown_fields_); std::swap(_cached_size_, other->_cached_size_); diff --git a/src/Proto/DeepLearning.proto b/src/Proto/DeepLearning.proto index 9ec87e6..b3b9ac3 100644 --- a/src/Proto/DeepLearning.proto +++ b/src/Proto/DeepLearning.proto @@ -24,6 +24,7 @@ message RNNStructParameter{ optional int32 hiddenLayerOutputDim = 3; optional int32 inputDim = 4; optional int32 outputDim = 5; + optional int32 timeSeriesLength = 6; }