diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Train.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Train.java
index ab0dd37c..15a28665 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Train.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Train.java
@@ -880,53 +880,57 @@ public void run() {
if (!lce.onlyCodeGeneration) {
// If there's a "from" clause, train.
try {
- if (lce.parser != null) {
- System.out.println("Training " + getName());
- if (preExtract) {
- preExtractAndPrune();
- System.gc();
- } else
- learner.saveLexicon();
- int trainingRounds = 1;
-
- if (tuningParameters) {
- String parametersPath = getName();
- if (Main.classDirectory != null)
- parametersPath =
- Main.classDirectory + File.separator + parametersPath;
- parametersPath += ".p";
-
- Learner.Parameters bestParameters = tune();
- trainingRounds = bestParameters.rounds;
- Learner.writeParameters(bestParameters, parametersPath);
- System.out.println(" " + getName()
- + ": Training on entire training set");
- } else {
- if (lce.rounds != null)
- trainingRounds = Integer.parseInt(((Constant) lce.rounds).value);
-
- if (lce.K != null) {
- int[] rounds = {trainingRounds};
- int k = Integer.parseInt(lce.K.value);
- double alpha = Double.parseDouble(lce.alpha.value);
- trainer.crossValidation(rounds, k, lce.splitPolicy, alpha,
- testingMetric, true);
+ learner.beginTraining();
+ try {
+ if (lce.parser != null) {
+ System.out.println("Training " + getName());
+ if (preExtract) {
+ preExtractAndPrune();
+ System.gc();
+ } else
+ learner.saveLexicon();
+ int trainingRounds = 1;
+
+ if (tuningParameters) {
+ String parametersPath = getName();
+ if (Main.classDirectory != null)
+ parametersPath =
+ Main.classDirectory + File.separator + parametersPath;
+ parametersPath += ".p";
+
+ Learner.Parameters bestParameters = tune();
+ trainingRounds = bestParameters.rounds;
+ Learner.writeParameters(bestParameters, parametersPath);
System.out.println(" " + getName()
+ ": Training on entire training set");
+ } else {
+ if (lce.rounds != null)
+ trainingRounds = Integer.parseInt(((Constant) lce.rounds).value);
+
+ if (lce.K != null) {
+ int[] rounds = {trainingRounds};
+ int k = Integer.parseInt(lce.K.value);
+ double alpha = Double.parseDouble(lce.alpha.value);
+ trainer.crossValidation(rounds, k, lce.splitPolicy, alpha,
+ testingMetric, true);
+ System.out.println(" " + getName()
+ + ": Training on entire training set");
+ }
}
- }
-
- trainer.train(lce.startingRound, trainingRounds);
-
- if (testParser != null) {
- System.out.println("Testing " + getName());
- new Accuracy(true).test(learner, learner.getLabeler(), testParser);
- }
-
- System.out.println("Writing " + getName());
- } else
- learner.saveLexicon(); // Writes .lex even if lexicon is empty.
-
+ trainer.train(lce.startingRound, trainingRounds);
+ } else
+ learner.saveLexicon(); // Writes .lex even if lexicon is empty.
+ } finally {
+ learner.doneTraining();
+ }
+
+ if (lce.parser != null && testParser != null) {
+ System.out.println("Testing " + getName());
+ new Accuracy(true).test(learner, learner.getLabeler(), testParser);
+ }
+
+ // save the final model.
+ System.out.println("Writing " + getName());
learner.save(); // Doesn't write .lex if lexicon is empty.
} catch (Exception e) {
System.err.println("LBJava ERROR: Exception while training " + getName() + ":");
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java
index 5705301a..1728143b 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java
@@ -66,6 +66,9 @@ public abstract class Learner extends Classifier {
/** The number of candidate examples when a global object is passed here. */
protected int candidates = 1;
+
+ /** this is set while training. */
+ protected boolean intraining = false;
/**
* This constructor is used by the LBJava compiler; it should never be called by a programmer.
@@ -259,7 +262,6 @@ public URL getModelLocation() {
return lcFilePath;
}
-
/**
* Sets the location of the lexicon as a regular file on this file system.
*
@@ -289,7 +291,6 @@ public URL getLexiconLocation() {
return lexFilePath;
}
-
/**
* Establishes a new feature counting policy for this learner's lexicon.
*
@@ -304,7 +305,6 @@ public void countFeatures(Lexicon.CountPolicy policy) {
lexicon.countFeatures(policy);
}
-
/**
* Returns this learner's feature lexicon after discarding any feature counts it may have been
* storing. This method is likely only useful when the lexicon and its counts are currently
@@ -320,7 +320,6 @@ public Lexicon getLexiconDiscardCounts() {
return lexicon;
}
-
/**
* Returns a new, emtpy learner into which all of the parameters that control the behavior of
* the algorithm have been copied. Here, "emtpy" means no learning has taken place.
@@ -331,7 +330,6 @@ public Learner emptyClone() {
return clone;
}
-
/**
* Trains the learning algorithm given an object as an example. By default, this simply converts
* the example object into arrays and passes it to {@link #learn(int[],double[],int[],double[])}
@@ -345,7 +343,6 @@ public void learn(Object example) {
(double[]) exampleArray[3]);
}
-
/**
* Trains the learning algorithm given a feature vector as an example. This simply converts the
* example object into arrays and passes it to {@link #learn(int[],double[],int[],double[])}.
@@ -633,6 +630,15 @@ public double realValue(int[] f, double[] v) {
+ getClass().getName() + "'.");
}
+ /**
+ * Start training, this might involve training many models, for cross validation,
+ * parameter tuning and so on.
+ **/
+ public void beginTraining() {
+ intraining = true;
+ }
+
+
/**
* Overridden by subclasses to perform any required post-processing computations after all
@@ -642,6 +648,21 @@ public double realValue(int[] f, double[] v) {
public void doneLearning() {}
+ /**
+ * Overridden by subclasses to perform any required post-training computations optimizations,
+ * in particular, feature subset reduction. This default method does nothing.
+ */
+ public void doneTraining() {
+ if (intraining) {
+ intraining = false;
+ } else {
+ throw new RuntimeException("calling doneLearning without previously calling beginTraining"
+ + " violates the lifecycle contract. Or perhaps the subclass does not call the superclass "
+ + "method. Contact the developer.");
+ }
+ }
+
+
/**
* This method is sometimes called before training begins, although it is not guaranteed to be
* called at all. It allows the number of examples and number of features to be passed to the
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Lexicon.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Lexicon.java
index 1941b64f..bb55b6a7 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Lexicon.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Lexicon.java
@@ -9,12 +9,15 @@
import java.io.Serializable;
import java.net.URL;
+import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import edu.illinois.cs.cogcomp.core.datastructures.vectors.*;
+import edu.illinois.cs.cogcomp.lbjava.classify.DiscreteConjunctiveFeature;
import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
+import edu.illinois.cs.cogcomp.lbjava.classify.RealConjunctiveFeature;
import edu.illinois.cs.cogcomp.lbjava.util.ByteString;
import edu.illinois.cs.cogcomp.lbjava.util.ClassUtils;
import edu.illinois.cs.cogcomp.lbjava.util.FVector;
@@ -305,7 +308,7 @@ public boolean contains(Feature f) {
*
* @param f The feature to look up.
* @return The integer key that the feature maps to.
- **/
+ **/
public int lookup(Feature f) {
return lookup(f, false, -1);
}
@@ -661,6 +664,36 @@ public void discardPrunedFeatures() {
pruneCutoff = -1;
}
+ /**
+ * Discard features at the provided indices. This operation is performed
+ * last to first so we can do it in place. This method will sort the input
+ * array.
+ * @param dumpthese the indexes of the features to dump.
+ */
+ public void discardPrunedFeatures(int [] dumpthese) {
+ Arrays.sort(dumpthese);
+ lexiconInv.remove(dumpthese);
+
+ // this compresses the FVector
+ lexiconInv = new FVector(lexiconInv);
+ if (lexicon != null) {
+
+ // reconstitute the lexicon.
+ lexicon.clear();
+ for (int i = 0; i < lexiconInv.size();i++) {
+ lexicon.put(lexiconInv.get(i), new Integer(i));
+ }
+
+ // sanity check, make sure the indices in the lexicon map matches the index in the feature vector
+ for (int i = 0; i < lexiconInv.size();i++) {
+ if (i != ((Integer)lexicon.get(lexiconInv.get(i))).intValue()) {
+ throw new RuntimeException("After optimization pruning, the index in the lexicon did "
+ + "not match the inverted index.");
+ }
+ }
+ }
+ }
+
/**
* Returns a deep clone of this lexicon implemented as a HashMap.
@@ -742,10 +775,9 @@ public int compare(int i1, int i2) {
ByteString previousBSIdentifier = null;
out.writeInt(indexes.length);
out.writeInt(pruneCutoff);
-
for (int i = 0; i < indexes.length; ++i) {
Feature f = inverse.get(indexes[i]);
- previousClassName =
+ previousClassName =
f.lexWrite(out, this, previousClassName, previousPackage, previousClassifier,
previousSIdentifier, previousBSIdentifier);
previousPackage = f.getPackage();
@@ -757,7 +789,6 @@ else if (f.hasByteStringIdentifier())
out.writeInt(indexes[i]);
}
-
if (featureCounts == null)
out.writeInt(0);
else
@@ -801,14 +832,12 @@ public void read(ExceptionlessInputStream in, boolean readCounts) {
pruneCutoff = in.readInt();
lexicon = null;
lexiconInv = new FVector(N);
-
for (int i = 0; i < N; ++i) {
Feature f =
Feature.lexReadFeature(in, this, previousClass, previousPackage,
previousClassifier, previousSIdentifier, previousBSIdentifier);
int index = in.readInt();
lexiconInv.set(index, f);
-
previousClass = f.getClass();
previousPackage = f.getPackage();
previousClassifier = f.getGeneratingClassifier();
@@ -817,7 +846,7 @@ public void read(ExceptionlessInputStream in, boolean readCounts) {
else if (f.hasByteStringIdentifier())
previousBSIdentifier = f.getByteStringIdentifier();
}
-
+
if (readCounts) {
featureCounts = new IVector();
featureCounts.read(in);
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java
index 106bb475..e0abd3ae 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java
@@ -16,6 +16,7 @@
import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector;
import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet;
+import edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LinearThresholdUnitOptimizer;
import edu.illinois.cs.cogcomp.lbjava.util.FVector;
@@ -57,6 +58,7 @@
*
* @author Nick Rizzolo
**/
+@SuppressWarnings("serial")
public abstract class LinearThresholdUnit extends Learner {
/** Default for {@link #initialWeight}. */
public static final double defaultInitialWeight = 0;
@@ -68,6 +70,8 @@ public abstract class LinearThresholdUnit extends Learner {
public static final double defaultLearningRate = 0.1;
/** Default for {@link #weightVector}. */
public static final SparseWeightVector defaultWeightVector = new SparseWeightVector();
+ /** any weight less than this is considered irrelevant. This is for prunning. */
+ public static final double defaultFeaturePruningThreshold = 0.000001;
/**
* The rate at which weights are updated; default {@link #defaultLearningRate}.
@@ -100,6 +104,8 @@ public abstract class LinearThresholdUnit extends Learner {
protected double negativeThickness;
/** The label producing classifier's allowable values. */
protected String[] allowableValues;
+ /** feature pruning threshold caps magnitude of useful features. */
+ public double featurePruningThreshold;
/**
* Default constructor. The learning rate and threshold take default values, while the name of
@@ -159,6 +165,21 @@ public LinearThresholdUnit(double r, double t, double pt, double nt) {
this("", r, t, pt, nt);
}
+ /**
+ * Use this constructor to fit a thick separator, where the positive and negative sides of the
+ * hyperplane will be given the specified separate thicknesses, while the name of the classifier
+ * gets the empty string.
+ *
+ * @param r The desired learning rate value.
+ * @param t The desired threshold value.
+ * @param pt The desired positive thickness.
+ * @param nt The desired negative thickness.
+ * @param fpt The feature pruning threshold.
+ */
+ public LinearThresholdUnit(double r, double t, double pt, double nt, double fpt) {
+ this("", r, t, pt, nt, fpt);
+ }
+
/**
* Initializing constructor. Sets the threshold, positive thickness, and negative thickness to
* their default values.
@@ -217,7 +238,21 @@ protected LinearThresholdUnit(String n, double r, double t, double pt) {
* @param nt The desired negative thickness.
**/
protected LinearThresholdUnit(String n, double r, double t, double pt, double nt) {
- this(n, r, t, pt, nt, (SparseWeightVector) defaultWeightVector.clone());
+ this(n, r, t, pt, nt, defaultFeaturePruningThreshold);
+ }
+
+ /**
+ * Takes the rate, threshold, positive thickness, and negative thickness and vector.
+ *
+ * @param n The name of the classifier.
+ * @param r The desired learning rate.
+ * @param t The desired value for the threshold.
+ * @param pt The desired positive thickness.
+ * @param nt The desired negative thickness.
+ * @param v An initial weight vector.
+ **/
+ protected LinearThresholdUnit(String n, double r, double t, double pt, double nt, SparseWeightVector v) {
+ this(n, r, t, pt, nt, defaultFeaturePruningThreshold, v);
}
/**
@@ -229,9 +264,25 @@ protected LinearThresholdUnit(String n, double r, double t, double pt, double nt
* @param t The desired value for the threshold.
* @param pt The desired positive thickness.
* @param nt The desired negative thickness.
+ * @param fpt The feature pruning threshold.
+ **/
+ protected LinearThresholdUnit(String n, double r, double t, double pt, double nt, double fpt) {
+ this(n, r, t, pt, nt, fpt, (SparseWeightVector) defaultWeightVector.clone());
+ }
+
+ /**
+ * Initializing constructor. Sets the threshold, positive thickness, and negative thickness to
+ * the specified values.
+ *
+ * @param n The name of the classifier.
+ * @param r The desired learning rate.
+ * @param t The desired value for the threshold.
+ * @param pt The desired positive thickness.
+ * @param nt The desired negative thickness.
+ * @param fpt The feature pruning threshold.
* @param v An initial weight vector.
**/
- protected LinearThresholdUnit(String n, double r, double t, double pt, double nt,
+ protected LinearThresholdUnit(String n, double r, double t, double pt, double nt, double fpt,
SparseWeightVector v) {
super(n);
Parameters p = new Parameters();
@@ -240,6 +291,7 @@ protected LinearThresholdUnit(String n, double r, double t, double pt, double nt
p.learningRate = r;
p.positiveThickness = pt;
p.negativeThickness = nt;
+ p.featurePruningThreshold = fpt;
setParameters(p);
}
@@ -291,6 +343,7 @@ public void setParameters(Parameters p) {
bias = p.initialWeight;
positiveThickness = p.thickness + p.positiveThickness;
negativeThickness = p.thickness + p.negativeThickness;
+ featurePruningThreshold = p.featurePruningThreshold;
}
/**
@@ -307,6 +360,7 @@ public Learner.Parameters getParameters() {
p.threshold = threshold;
p.positiveThickness = positiveThickness;
p.negativeThickness = negativeThickness;
+ p.featurePruningThreshold = featurePruningThreshold;
return p;
}
@@ -531,6 +585,15 @@ public void initialize(int numExamples, int numFeatures) {
}
+ /**
+ * When training is complete, optimize the feature set by discarding low value
+ * weights.
+ */
+ public void doneTraining() {
+ super.doneTraining();
+ LinearThresholdUnitOptimizer ltuo = new LinearThresholdUnitOptimizer(this);
+ ltuo.optimize();
+ }
/**
* An LTU returns two scores; one for the negative classification and one for the positive
* classification. By default, the score for the positive classification is the result of
@@ -751,7 +814,8 @@ public static class Parameters extends Learner.Parameters {
public double positiveThickness;
/** The thickness of the hyperplane on the negative side; default 0. */
public double negativeThickness;
-
+ /** feature pruning threshold caps magnitude of useful features. */
+ public double featurePruningThreshold;
/** Sets all the default values. */
public Parameters() {
@@ -760,6 +824,7 @@ public Parameters() {
initialWeight = defaultInitialWeight;
threshold = defaultThreshold;
thickness = defaultThickness;
+ featurePruningThreshold = defaultFeaturePruningThreshold;
}
@@ -781,6 +846,7 @@ public Parameters(Parameters p) {
thickness = p.thickness;
positiveThickness = p.positiveThickness;
negativeThickness = p.negativeThickness;
+ featurePruningThreshold = p.featurePruningThreshold;
}
@@ -801,7 +867,6 @@ public void setParameters(Learner l) {
**/
public String nonDefaultString() {
String result = super.nonDefaultString();
-
if (learningRate != LinearThresholdUnit.defaultLearningRate)
result += ", learningRate = " + learningRate;
if (initialWeight != LinearThresholdUnit.defaultInitialWeight)
@@ -814,10 +879,22 @@ public String nonDefaultString() {
result += ", positiveThickness = " + positiveThickness;
if (negativeThickness != 0)
result += ", negativeThickness = " + negativeThickness;
-
+ if (featurePruningThreshold != LinearThresholdUnit.defaultFeaturePruningThreshold)
+ result += ", featurePruningThreshold = " + featurePruningThreshold;
if (result.startsWith(", "))
result = result.substring(2);
return result;
}
}
+
+
+ /**
+ * Given the index of the weights to prune, discard them, then shrink the weight vector down
+ * to save memory.
+ * @param uselessfeatures the features being pruned.
+ * @param numberFeatures the total number of features before pruning.
+ */
+ public void pruneWeights(int[] uselessfeatures, int numberFeatures) {
+ this.getWeightVector().pruneWeights(uselessfeatures, numberFeatures);
+ }
}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NeuralNetLearner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NeuralNetLearner.java
new file mode 100644
index 00000000..e25c5b5d
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NeuralNetLearner.java
@@ -0,0 +1,447 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.learn;
+
+import java.io.PrintStream;
+import java.util.Arrays;
+import java.util.Random;
+
+import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
+import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector;
+import edu.illinois.cs.cogcomp.lbjava.classify.RealPrimitiveStringFeature;
+import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet;
+import edu.illinois.cs.cogcomp.lbjava.neuralnet.Layer;
+import edu.illinois.cs.cogcomp.lbjava.neuralnet.SimpleNNTrainer;
+
+/**
+ * This class will leverage the Neural Net implementation {@see edu.illinois.cs.cogcomp.lbjava.neuralnet.NeuralNetwork}
+ * to allow creation and use of a backprop neural net implementation including momentum, bias, and back propogation
+ * for learning. There is a threaded learner that works quite well ONLY where there are a large number of weights
+ * between layers.
+ *
+ * This class is really just a wrapper for a SimpleNNTrainer which does all the work of training.
+ * @author redman
+ */
+public class NeuralNetLearner extends Learner {
+
+ /** computed */
+ private static final long serialVersionUID = -3369861028861092661L;
+
+ /** the parameters for learning and stuff. */
+ private Parameters parameters = new Parameters();
+
+ /** This is the object that will train the neural net up. It uses it own
+ * interal mechanism and data representation for efficiency. */
+ private SimpleNNTrainer trainer = null;
+
+ /**
+ * our props include not only number of rounds (epochs), also a learning rate and momentum.
+ * @author redman
+ */
+ public static class Parameters extends Learner.Parameters {
+ /** default */
+ private static final long serialVersionUID = 1L;
+
+ /** the learning rate. */
+ public float learningRate = 0.5f;
+
+ /** the momentum value. */
+ public float momentum = 0.5f;
+
+ /** the momentum value. */
+ public int seed = -1;
+
+ /** the number of inputs */
+ public int inputCount = 0;
+
+ /** the number of outputs */
+ public int outputCount = 1;
+
+ /** the number of outputs from the single hidden layer */
+ public int hiddenCount = 100;
+
+ /** the layers of the neural network. */
+ private Layer[] layers;
+
+ /**
+ * Copy properties from the provided properties.
+ * @param p the props to copy.
+ */
+ public Parameters(Parameters p) {
+ this.learningRate = p.learningRate;
+ this.momentum = p.momentum;
+ this.seed = p.seed;
+ this.inputCount = p.inputCount;
+ this.outputCount = p.outputCount;
+ this.hiddenCount = p.hiddenCount;
+ }
+ /**
+ * Copy properties from the provided properties.
+ * @param p the props to copy.
+ */
+ public Parameters() {
+ this.learningRate = 0.5f;
+ this.momentum = 0.5f;
+ this.seed = -1;
+ this.inputCount = 0;
+ this.hiddenCount = 100;
+ this.outputCount = 1;
+ }
+
+ }
+
+ /** used to store inputs so we don't realloc these arrays over and over. This is an optimization
+ * only possible because we know this guys is not multithreaded. */
+ private float inputs[] = null;
+
+ /** used to store inputs so we don't realloc these arrays over and over. This is an optimization
+ * only possible because we know this guys is not multithreaded. */
+ private float outputs[] = null;
+
+ /** number of neurons in each layer, including input and output layers.*/
+ private int[] layerSizes = null;
+
+ /**
+ * Init the neural network learner by providing array with number of neurons in each layer, including
+ * the input layer. The caller will need to determin the number of inputs, the number of outputs and the number
+ * of hidden layers, and the neurons in that layer. The first index in teh layerSizes indicates the number of inputs,
+ * the middle layers sizes are determined by the middle integer sizes, and the number of outputs is the last number
+ * of neurons.
+ * @param layerSizes the number of neurons in each layer.
+ */
+ public NeuralNetLearner () {
+ super("Howdy");
+ this.layerSizes = new int[3];
+ }
+
+ /**
+ * given arguments for initialization parameters.
+ * @param p the parameters.
+ */
+ public NeuralNetLearner(Parameters p) {
+ super("Howdy");
+ this.parameters = p;
+ }
+
+ /**
+ * The learning rate takes the default value.
+ * @param n The name of the classifier.
+ */
+ public NeuralNetLearner(String n) {
+ super(n);
+ }
+
+ /**
+ * Init the neural network learner by providing array with number of neurons in each layer, including
+ * the input layer. The caller will need to determin the number of inputs, the number of outputs and the number
+ * of hidden layers, and the neurons in that layer. The first index in teh layerSizes indicates the number of inputs,
+ * the middle layers sizes are determined by the middle integer sizes, and the number of outputs is the last number
+ * of neurons.
+ * @param layerSizes the number of neurons in each layer.
+ */
+ public NeuralNetLearner (int[] layerSizes, Parameters p, boolean training) {
+ super("Howdy");
+ parameters = p;
+ parameters.layers = new Layer[layerSizes.length-1];
+ this.layerSizes = layerSizes;
+ this.forget();
+ }
+
+ /**
+ * Resets the weight vector to all zeros.
+ */
+ public void forget() {
+ super.forget();
+ if (this.getInputCount() != -1) {
+ this.layerSizes = new int[3];
+ this.layerSizes[0] = this.getInputCount();
+ this.layerSizes[1] = this.getHiddenCount();
+ this.layerSizes[2] = this.getOutputCount();
+ parameters.layers = new Layer[layerSizes.length-1];
+ Layer[] l = this.parameters.layers;
+ Random r = new Random (1234);
+ for (int i = 0; i < layerSizes.length-1; i++) {
+ l[i] = new Layer(layerSizes[i], layerSizes[i+1], r);
+ }
+ inputs = new float[l[0].getNumberInputs()];
+ outputs = new float[l[l.length-1].getNumberOutputs()];
+ trainer = new SimpleNNTrainer(parameters.layers, parameters.learningRate, parameters.momentum);
+ }
+ }
+
+ /**
+ * Returns a string describing the output feature type of this classifier.
+ * @return "real"
+ **/
+ public String getOutputType() {
+ return "real";
+ }
+ /**
+ * Writes the learned function's internal representation in binary form.
+ * @param out The output stream.
+
+ public void write(ExceptionlessOutputStream out) {
+ super.write(out);
+ out.writeFloat(this.parameters.learningRate);
+ out.writeFloat(this.parameters.momentum);
+ out.writeInt(this.parameters.rounds);
+ if (this.layerSizes == null)
+ out.writeInt(0);
+ else {
+ out.writeInt(this.layerSizes.length);
+ for (int neurons : this.layerSizes)
+ out.writeInt(neurons);
+ for (Layer l : this.parameters.layers) {
+ l.write(out);
+ }
+ }
+ }
+
+ /**
+ * Reads the binary representation of a learner with this object's run-time type, overwriting
+ * any and all learned or manually specified parameters as well as the label lexicon but without
+ * modifying the feature lexicon.
+ * @param in The input stream.
+
+ public void read(ExceptionlessInputStream in) {
+ super.read(in);
+ this.parameters.learningRate = in.readFloat();
+ this.parameters.momentum = in.readFloat();
+ this.parameters.rounds = in.readInt();
+ int layers = in.readInt();
+ if (layers != 0) {
+ int[] szs = new int[layers];
+ for (int i = 0 ; i < szs.length; i++)
+ szs[i] = in.readInt();
+ this.layerSizes = szs;
+ Random r = new Random (1234);
+ for (int i = 0; i < layerSizes.length-1; i++) {
+ this.parameters.layers[i] = new Layer(layerSizes[i], layerSizes[i+1], r);
+ }
+ trainer = new SimpleNNTrainer(parameters.layers, parameters.learningRate, parameters.momentum);
+ for (Layer l : this.parameters.layers) {
+ l.read(in);
+ }
+ }
+ }
+
+
+ /**
+ * Populate the input and output vectors with the values for only those
+ * features that are represented.
+ */
+ final private void populateNNVector(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels,
+ double[] labelValues) {
+ Arrays.fill(inputs,0.0f);
+ Arrays.fill(outputs,0.0f);
+ for (int i = 0; i < exampleFeatures.length; i++)
+ inputs[exampleFeatures[i]] = (float)exampleValues[i];
+ if (exampleLabels != null)
+ for (int i = 0; i < exampleLabels.length; i++)
+ outputs[exampleLabels[i]] = (float)labelValues[i];
+
+ }
+
+ /**
+ * Trains the learning algorithm given an object as an example.
+ * @param exampleFeatures The example's array of feature indices.
+ * @param exampleValues The example's array of feature values.
+ * @param exampleLabels The example's label(s).
+ * @param labelValues The labels' values.
+ **/
+ public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels,
+ double[] labelValues) {
+ this.populateNNVector(exampleFeatures, exampleValues, exampleLabels, labelValues);
+ this.trainer.train(inputs, outputs);
+ }
+
+ /**
+ * @param exampleFeatures The example's array of feature indices.
+ * @param exampleValues The example's array of feature values.
+ * @return null
+ **/
+ public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) {
+ return null;
+ }
+
+ /**
+ * Returns the classification of the given example as a single feature instead of a
+ * {@link FeatureVector}.
+ * @param f The features array.
+ * @param v The values array.
+ * @return The classification of the example as a feature.
+ **/
+ public Feature featureValue(int[] f, double[] v) {
+ this.populateNNVector(f, v, null, null);
+
+ // this returns the activation energies for ALL layers, we only wan the output layer
+ float[][] results = this.trainer.activate(inputs);
+
+ // the last vector contains the score, this is the output of the last layer.
+ return new RealPrimitiveStringFeature(containingPackage, name, "", results [results.length-1][0]);
+ }
+
+ /**
+ * Simply computes the dot product of the weight vector and the example
+ *
+ * @param exampleFeatures The example's array of feature indices.
+ * @param exampleValues The example's array of feature values.
+ * @return The computed real value.
+ **/
+ public double realValue(int[] exampleFeatures, double[] exampleValues) {
+ this.populateNNVector(exampleFeatures, exampleValues, null, null);
+ return (double) this.trainer.activate(inputs)[0][0];
+ }
+
+ /**
+ * Simply computes the dot product of the weight vector and the feature vector extracted from
+ * the example object.
+ *
+ * @param exampleFeatures The example's array of feature indices.
+ * @param exampleValues The example's array of feature values.
+ * @return The computed feature (in a vector).
+ **/
+ public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) {
+ return new FeatureVector(featureValue(exampleFeatures, exampleValues));
+ }
+
+ /**
+ * Writes the algorithm's internal representation as text. In the first line of output, the name
+ * of the classifier is printed, followed by {@link #learningRate} and {@link #bias}.
+ * @param out The output stream.
+ */
+ public void write(PrintStream out) {
+ out.println(name + ": " + this.parameters.learningRate + ", " + this.parameters.momentum + ", " + this.parameters.rounds);
+ for (Layer l : this.parameters.layers) {
+ l.write(out);
+ }
+ }
+
+ /**
+ * Returns a deep clone of this learning algorithm.
+ * TODO
+ */
+ public Object clone() {
+ NeuralNetLearner clone = null;
+ try {
+ clone = (NeuralNetLearner) super.clone();
+ } catch (Exception e) {
+ System.err.println("Error cloning StochasticGradientDescent: " + e);
+ System.exit(1);
+ }
+ return clone;
+ }
+
+ /**
+ * @return the seed to seed all random number gen.
+ */
+ public int getSeed() {
+ return this.parameters.seed;
+ }
+
+ /**
+ * @param seed the seed to set
+ */
+ public void setSeed(int seed) {
+ this.parameters.seed = seed;
+ }
+
+ /**
+ * @return the number of total inputs
+ */
+ public int getInputCount() {
+ return this.parameters.inputCount;
+ }
+
+ /**
+ * @param inputCount the inputCount to set
+ */
+ public void setInputCount(int inputCount) {
+ this.parameters.inputCount = inputCount;
+ }
+
+ /**
+ * @return the outputCount
+ */
+ public int getOutputCount() {
+ return this.parameters.outputCount;
+ }
+
+ /**
+ * @param outputCount the outputCount to set
+ */
+ public void setOutputCount(int outputCount) {
+ this.parameters.outputCount = outputCount;
+ }
+
+ /**
+ * @return the hiddenCount
+ */
+ public int getHiddenCount() {
+ return this.parameters.hiddenCount;
+ }
+
+ /**
+ * @param hiddenCount the hiddenCount to set
+ */
+ public void setHiddenCount(int hiddenCount) {
+ this.parameters.hiddenCount = hiddenCount;
+ }
+
+ /**
+ * @return the learning rate used to throttle the rate at wich the weight parameters change.
+ */
+ public float getLearningRate() {
+ return parameters.learningRate;
+ }
+
+ /**
+ * set the learning rate at which the weight parameters change.
+ * @param learningRate the learning rate at which the weight parameters change.
+ */
+ public void setLearningRate(float learningRate) {
+ this.parameters.learningRate = learningRate;
+ }
+
+ public float getMomentum() {
+ return parameters.momentum;
+ }
+
+ /**
+ * set the value used to prevent convergence against local minimum.
+ * @param momentum used to prevent convergence against local minimum.
+ */
+ public void setMomentum(float momentum) {
+ this.parameters.momentum = momentum;
+ }
+
+ /**
+ * Get the number of epochs.
+ * @return number of epochs to train.
+ */
+ public int getEpochs() {
+ return parameters.rounds;
+ }
+
+ /**
+ * set the number of training iterations. More should yield better results, until overfit.
+ * @param learningRate set the number of training iterations.
+ */
+ public void setEpochs(int epochs) {
+ this.parameters.rounds = epochs;
+ }
+
+ /**
+ * Retrieves the parameters that are set in this learner.
+ * @return An object containing all the values of the parameters that control the behavior of
+ * this learning algorithm.
+ **/
+ public Learner.Parameters getParameters() {
+ return parameters;
+ }
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java
index c9b74899..4e76223a 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java
@@ -61,6 +61,15 @@ public class SparseAveragedPerceptron extends SparsePerceptron {
* {@link SparseAveragedPerceptron.AveragedWeightVector}.
**/
protected AveragedWeightVector awv;
+
+ /**
+ * @return the awv the averaged weight vector
+ */
+ public AveragedWeightVector getAveragedWeightVector() {
+ return awv;
+ }
+
+
/** Keeps the extra information necessary to compute the averaged bias. */
protected double averagedBias;
@@ -729,5 +738,46 @@ public Object clone() {
public SparseWeightVector emptyClone() {
return new AveragedWeightVector();
}
+
+ /**
+ * If we prune worthless weights, we must also prune useless averages.
+ * @param uselessfeatures useless features.
+ * @param numfeatures since this weight vec does not know how many features there are, it must be passed in
+ */
+ public void pruneWeights(int[] uselessfeatures, int numfeatures) {
+ if (uselessfeatures.length == 0)
+ return;
+ super.pruneWeights(uselessfeatures, numfeatures);
+
+ // create a new smaller weight vector for the pruned weights.
+ int oldsize = this.averagedWeights.size();
+ if (oldsize > numfeatures) {
+ throw new RuntimeException("There was an averaged weight vector with more weights("+oldsize+
+ ") than the number of features("+numfeatures+")!");
+ }
+ int newsize = numfeatures - uselessfeatures.length;
+ double [] newvec = new double[newsize];
+
+ // copy the weights from the old vector.
+ int uselessindex = 0;
+ int newvecindex = 0;
+ for (int oldindex = 0; oldindex < oldsize; oldindex++) {
+ if (uselessindex < uselessfeatures.length && uselessfeatures[uselessindex] == oldindex) {
+ // this is a useless feature, we will skip it.
+ uselessindex++;
+ } else {
+ newvec[newvecindex] = averagedWeights.get(oldindex);
+ newvecindex++;
+ }
+ }
+
+ // compress the array.
+ if (newvecindex != newsize) {
+ double[] tmp = new double[newvecindex];
+ System.arraycopy(newvec, 0, tmp, 0, newvecindex);;
+ newvec = tmp;
+ }
+ this.averagedWeights = new DVector(newvec);
+ }
}
}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java
index b63d0b0f..822fc1fd 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java
@@ -10,6 +10,7 @@
import java.io.PrintStream;
import java.util.Collection;
import java.util.Iterator;
+import java.util.Map.Entry;
import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream;
import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream;
@@ -19,6 +20,7 @@
import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector;
import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet;
+import edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.SparseNetworkOptimizer;
/**
* A SparseNetworkLearner uses multiple {@link LinearThresholdUnit}s to make a
@@ -44,7 +46,6 @@
public class SparseNetworkLearner extends Learner {
private static final long serialVersionUID = 1L;
-
/** Default for {@link #baseLTU}. */
public static final LinearThresholdUnit defaultBaseLTU = new SparseAveragedPerceptron();
@@ -70,7 +71,6 @@ public class SparseNetworkLearner extends Learner {
/** Whether or not this learner's labeler produces conjunctive features. */
protected boolean conjunctiveLabels;
-
/**
* Instantiates this multi-class learner with the default learning algorithm:
* {@link #defaultBaseLTU}.
@@ -185,7 +185,6 @@ public void setParameters(Parameters p) {
setLTU(p.baseLTU);
}
-
/**
* Retrieves the parameters that are set in this learner.
*
@@ -198,7 +197,6 @@ public Learner.Parameters getParameters() {
return p;
}
-
/**
* Sets the baseLTU variable. This method will not have any effect on the
* LTUs that already exist in the network. However, new LTUs created after this method is
@@ -211,7 +209,6 @@ public void setLTU(LinearThresholdUnit ltu) {
baseLTU.name = name + "$baseLTU";
}
-
/**
* Sets the labeler.
*
@@ -229,7 +226,6 @@ public void setLabeler(Classifier l) {
super.setLabeler(l);
}
-
/**
* Sets the extractor.
*
@@ -255,7 +251,6 @@ public void setNetworkLabel(int label) {
network.set(label, ltu);
}
-
/**
* Each example is treated as a positive example for the linear threshold unit associated with
* the label's value that is active for the example and as a negative example for all other
@@ -290,7 +285,17 @@ public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLa
ltu.learn(exampleFeatures, exampleValues, l, labelValues);
}
}
-
+
+ /**
+ * When we complete learning, we will do an optimization.
+ */
+ public void doneTraining() {
+ super.doneTraining();
+
+ // do the optimization
+ SparseNetworkOptimizer optimizer = new SparseNetworkOptimizer(this);
+ optimizer.optimize();
+ }
/** Simply calls doneLearning() on every LTU in the network. */
public void doneLearning() {
@@ -304,14 +309,12 @@ public void doneLearning() {
}
}
-
/** Sets the number of examples and features. */
public void initialize(int ne, int nf) {
numExamples = ne;
numFeatures = nf;
}
-
/**
* Simply calls {@link LinearThresholdUnit#doneWithRound()} on every LTU in the network.
*/
@@ -326,14 +329,12 @@ public void doneWithRound() {
}
}
-
/** Clears the network. */
public void forget() {
super.forget();
network = new OVector();
}
-
/**
* Returns scores for only those labels in the given collection. If the given collection is
* empty, scores for all labels will be returned. If there is no {@link LinearThresholdUnit}
@@ -352,7 +353,6 @@ public ScoreSet scores(Object example, Collection candidates) {
return scores((int[]) exampleArray[0], (double[]) exampleArray[1], candidates);
}
-
/**
* Returns scores for only those labels in the given collection. If the given collection is
* empty, scores for all labels will be returned. If there is no {@link LinearThresholdUnit}
@@ -404,7 +404,6 @@ public ScoreSet scores(int[] exampleFeatures, double[] exampleValues, Collection
return result;
}
-
/**
* This method is a surrogate for {@link #scores(int[],double[],Collection)} when the labeler is
* known to produce conjunctive features. It is necessary because when given a string label from
@@ -438,7 +437,6 @@ protected ScoreSet conjunctiveScores(int[] exampleFeatures, double[] exampleValu
return result;
}
-
/**
* Produces a set of scores indicating the degree to which each possible discrete classification
* value is associated with the given example object. These scores are just the scores of each
@@ -465,7 +463,6 @@ public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) {
return result;
}
-
/**
* Returns the classification of the given example as a single feature instead of a
* {@link FeatureVector}.
@@ -494,7 +491,6 @@ public Feature featureValue(int[] f, double[] v) {
return bestValue == -1 ? null : predictions.get(bestValue);
}
-
/**
* This implementation uses a winner-take-all comparison of the outputs from the individual
* linear threshold units' score methods.
@@ -507,7 +503,6 @@ public String discreteValue(int[] exampleFeatures, double[] exampleValues) {
return featureValue(exampleFeatures, exampleValues).getStringValue();
}
-
/**
* This implementation uses a winner-take-all comparison of the outputs from the individual
* linear threshold units' score methods.
@@ -520,7 +515,6 @@ public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) {
return new FeatureVector(featureValue(exampleFeatures, exampleValues));
}
-
/**
* Using this method, the winner-take-all competition is narrowed to involve only those labels
* contained in the specified list. The list must contain only Strings.
@@ -535,7 +529,6 @@ public Feature valueOf(Object example, Collection candidates) {
return valueOf((int[]) exampleArray[0], (double[]) exampleArray[1], candidates);
}
-
/**
* Using this method, the winner-take-all competition is narrowed to involve only those labels
* contained in the specified list. The list must contain only Strings.
@@ -596,7 +589,6 @@ public Feature valueOf(int[] exampleFeatures, double[] exampleValues, Collection
return predictions.get(bestValue);
}
-
/**
* This method is a surrogate for {@link #valueOf(int[],double[],Collection)} when the labeler
* is known to produce conjunctive features. It is necessary because when given a string label
@@ -634,7 +626,6 @@ protected Feature conjunctiveValueOf(int[] exampleFeatures, double[] exampleValu
return predictions.get(bestValue);
}
-
/**
* Writes the algorithm's internal representation as text.
*
@@ -659,7 +650,6 @@ public void write(PrintStream out) {
out.close();
}
-
/**
* Writes the learned function's internal representation in binary form.
*
@@ -682,7 +672,6 @@ public void write(ExceptionlessOutputStream out) {
out.close();
}
-
/**
* Reads the binary representation of a learner with this object's run-time type, overwriting
* any and all learned or manually specified parameters as well as the label lexicon but without
@@ -700,7 +689,6 @@ public void read(ExceptionlessInputStream in) {
network.add(Learner.readLearner(in));
}
-
/** Returns a deep clone of this learning algorithm. */
public Object clone() {
SparseNetworkLearner clone = null;
@@ -727,7 +715,6 @@ public Object clone() {
return clone;
}
-
/**
* Simply a container for all of {@link SparseNetworkLearner}'s configurable parameters. Using
* instances of this class should make code more readable and constructors less complicated.
@@ -743,13 +730,11 @@ public static class Parameters extends Learner.Parameters {
**/
public LinearThresholdUnit baseLTU;
-
/** Sets all the default values. */
public Parameters() {
baseLTU = (LinearThresholdUnit) defaultBaseLTU.clone();
}
-
/**
* Sets the parameters from the parent's parameters object, giving defaults to all
* parameters declared in this object.
@@ -759,14 +744,12 @@ public Parameters(Learner.Parameters p) {
baseLTU = (LinearThresholdUnit) defaultBaseLTU.clone();
}
-
/** Copy constructor. */
public Parameters(Parameters p) {
super(p);
baseLTU = p.baseLTU;
}
-
/**
* Calls the appropriate Learner.setParameters(Parameters) method for this
* Parameters object.
@@ -777,7 +760,6 @@ public void setParameters(Learner l) {
((SparseNetworkLearner) l).setParameters(this);
}
-
/**
* Creates a string representation of these parameters in which only those parameters that
* differ from their default values are mentioned.
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java
index defe1001..0353daba 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java
@@ -29,7 +29,9 @@
* @author Nick Rizzolo
**/
public class SparseWeightVector implements Cloneable, Serializable {
- /**
+ /** default. */
+ private static final long serialVersionUID = 1L;
+ /**
* When a feature appears in an example but not in this vector, it is assumed to have this
* weight.
**/
@@ -40,7 +42,7 @@ public class SparseWeightVector implements Cloneable, Serializable {
/** The weights in the vector indexed by their {@link Lexicon} key. */
protected DVector weights;
- /** Simply instantiates {@link #weights}. */
+ /** Simply instantiates {@link #weights}. */
public SparseWeightVector() {
this(new DVector(defaultCapacity));
}
@@ -106,6 +108,13 @@ public void setWeight(int featureIndex, double w, double defaultW) {
weights.set(featureIndex, w, defaultW);
}
+ /**
+ * For those cases where we need the raw weights (during model optimization).
+ * @return the unmolested weights.
+ */
+ public DVector getRawWeights() {
+ return weights;
+ }
/**
* Takes the dot product of this SparseWeightVector with the argument vector, using
@@ -317,7 +326,8 @@ public void toStringJustWeights(PrintStream out) {
* @param min Sets the minimum width for the textual representation of all features.
* @param lex The feature lexicon.
**/
- public void toStringJustWeights(PrintStream out, int min, Lexicon lex) {
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ public void toStringJustWeights(PrintStream out, int min, Lexicon lex) {
Map map = lex.getMap();
Map.Entry[] entries = (Map.Entry[]) map.entrySet().toArray(new Map.Entry[map.size()]);
Arrays.sort(entries, new Comparator() {
@@ -420,7 +430,8 @@ public static SparseWeightVector readWeightVector(ExceptionlessInputStream in) {
String name = in.readString();
if (name == null)
return null;
- Class c = ClassUtils.getClass(name);
+ @SuppressWarnings("rawtypes")
+ Class c = ClassUtils.getClass(name);
SparseWeightVector result = null;
try {
@@ -482,4 +493,44 @@ public Object clone() {
public SparseWeightVector emptyClone() {
return new SparseWeightVector();
}
+
+ /**
+ * delete all irrelevant feature weights.
+ * @param uselessfeatures useless features.
+ * @param numfeatures since this weight vec does not know how many features there are, it must be passed in
+ */
+ public void pruneWeights(int[] uselessfeatures, int numfeatures) {
+ if (uselessfeatures.length == 0)
+ return;
+
+ // create a new smaller weight vector for the pruned weights.
+ int oldsize = weights.size();
+ if (oldsize > numfeatures) {
+ throw new RuntimeException("There was a weight vector with more weights("+oldsize+
+ ") than the number of features("+numfeatures+")!");
+ }
+ int newsize = numfeatures - uselessfeatures.length;
+ double [] newvec = new double[newsize];
+
+ // copy the weights from the old vector.
+ int uselessindex = 0;
+ int newvecindex = 0;
+ for (int oldindex = 0; oldindex < oldsize; oldindex++) {
+ if (uselessindex < uselessfeatures.length && uselessfeatures[uselessindex] == oldindex) {
+ // this is a useless feature, we will skip it.
+ uselessindex++;
+ } else {
+ newvec[newvecindex] = weights.get(oldindex);
+ newvecindex++;
+ }
+ }
+
+ // compress the array.
+ if (newvecindex != newsize) {
+ double[] tmp = new double[newvecindex];
+ System.arraycopy(newvec, 0, tmp, 0, newvecindex);;
+ newvec = tmp;
+ }
+ this.weights = new DVector(newvec);
+ }
}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java
index aec40db5..2c930b57 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java
@@ -24,6 +24,7 @@
import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector;
import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet;
+import edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.SupportVectorMachineOptimizer;
import edu.illinois.cs.cogcomp.lbjava.util.ByteString;
import edu.illinois.cs.cogcomp.lbjava.util.FVector;
@@ -64,6 +65,10 @@
* @author Michael Paul
**/
public class SupportVectorMachine extends Learner {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
/** Default for {@link #solverType}. */
public static final String defaultSolverType = "L2LOSS_SVM";
/** Default for {@link #C}. */
@@ -72,6 +77,11 @@ public class SupportVectorMachine extends Learner {
public static final double defaultEpsilon = 0.1;
/** Default for {@link #bias}. */
public static final double defaultBias = 1.0;
+ /** any weight less than this is considered irrelevant. This is for prunning. */
+ public static final double defaultFeaturePruningThreshold = 0.000001;
+
+ /** feature pruning threshold caps magnitude of useful features. */
+ public double featurePruningThreshold;
/**
* Keeps track of whether the doneLearning() warning message has been printed.
@@ -114,6 +124,14 @@ public class SupportVectorMachine extends Learner {
/** The number of bias features; there are either 0 or 1 of them. */
protected int biasFeatures;
+ /**
+ * @return the biasFeatures
+ */
+ public int getBiasFeatures() {
+ return biasFeatures;
+ }
+
+
/** Controls if liblinear-related messages are output */
protected boolean displayLL = false;
@@ -210,6 +228,20 @@ public SupportVectorMachine(double c, double e, double b, String s, boolean d) {
this("", c, e, b, s, d);
}
+ /**
+ * Initializing constructor. The name of the classifier gets the empty string.
+ *
+ * @param c The desired C value.
+ * @param e The desired epsilon value.
+ * @param b The desired bias.
+ * @param s The solver type.
+ * @param d Toggles if the liblinear-related output should be displayed.
+ * @param fpt the feature pruning threshold.
+ **/
+ public SupportVectorMachine(double c, double e, double b, String s, boolean d, double fpt) {
+ this("", c, e, b, s, d, fpt);
+ }
+
/**
* Initializing constructor. C, epsilon, the bias, and the solver type take the default values.
*
@@ -276,6 +308,20 @@ public SupportVectorMachine(String n, double c, double e, double b, String s) {
* @param d Toggles if the liblinear-related output should be displayed.
**/
public SupportVectorMachine(String n, double c, double e, double b, String s, boolean d) {
+ this(n, c, e, b, s, d, SupportVectorMachine.defaultFeaturePruningThreshold);
+ }
+
+ /**
+ * Initializing constructor.
+ *
+ * @param n The name of the classifier.
+ * @param c The desired C value.
+ * @param e The desired epsilon value.
+ * @param b The desired bias.
+ * @param s The solver type.
+ * @param d Toggles if the liblinear-related output should be displayed.
+ **/
+ public SupportVectorMachine(String n, double c, double e, double b, String s, boolean d, double fpt) {
super(n);
newLabelLexicon = labelLexicon;
Parameters p = new Parameters();
@@ -284,10 +330,12 @@ public SupportVectorMachine(String n, double c, double e, double b, String s, bo
p.bias = b;
p.solverType = s;
p.displayLL = d;
+ p.featurePruningThreshold = fpt;
allowableValues = new String[0];
setParameters(p);
}
+
/**
* Initializing constructor. Sets all member variables to their associated settings in the
* {@link SupportVectorMachine.Parameters} object. The name of the classifier gets the empty
@@ -317,9 +365,23 @@ public double[] getWeights() {
return weights;
}
+ /**
+ * @return the numFeatures
+ */
+ public int getNumFeatures() {
+ return numFeatures;
+ }
+
public int getNumClasses() {
return numClasses;
}
+
+ /**
+ * @return the solverType
+ */
+ public String getSolverType() {
+ return solverType;
+ }
/**
* Sets the values of parameters that control the behavior of this learning algorithm.
@@ -333,6 +395,7 @@ public void setParameters(Parameters p) {
biasFeatures = (bias >= 0) ? 1 : 0;
solverType = p.solverType;
displayLL = p.displayLL;
+ featurePruningThreshold = p.featurePruningThreshold;
}
@@ -349,6 +412,7 @@ public Learner.Parameters getParameters() {
p.bias = bias;
p.solverType = solverType;
p.displayLL = displayLL;
+ p.featurePruningThreshold = this.featurePruningThreshold;
return p;
}
@@ -403,6 +467,7 @@ public void initialize(int ne, int nf) {
* @param exampleLabels The example's array of label indices.
* @param labelValues The example's array of label values.
**/
+ @SuppressWarnings({ "unchecked", "rawtypes" })
public void learn(final int[] exampleFeatures, double[] exampleValues, int[] exampleLabels,
double[] labelValues) {
// Expand the size of the example arrays if they are full.
@@ -600,12 +665,22 @@ else if (newLabelLexicon.size() > labelLexicon.size()) {
weights = trainedModel.getFeatureWeights();
allExamples = null;
allLabels = null;
-
if (displayLL)
System.out.println(" Finished training at " + new Date());
}
+ /**
+ * Optimize the model by doing feature pruning, drop the low value weights.
+ */
+ public void doneTraining() {
+ super.doneTraining();
+
+ // optimize the resulting model by discarding low weight features.
+ SupportVectorMachineOptimizer svmo = new SupportVectorMachineOptimizer(this);
+ svmo.optimize();
+ }
+
/**
* Writes the algorithm's internal representation as text. In the first line of output, the name
* of the classifier is printed, followed by {@link #C}, {@link #epsilon}, {@link #bias}, and
@@ -895,7 +970,7 @@ public double score(int[] exampleFeatures, double[] exampleValues, int label) {
numClasses = 1;
label = 0;
}
-
+
for (int i = 0; i < exampleFeatures.length; i++) {
int f = exampleFeatures[i];
@@ -929,6 +1004,43 @@ public Feature valueOf(Object example, Collection candidates) {
}
+ /**
+ * Given the index of the weights to prune, discard them, then shrink the weight vector down
+ * to save memory.
+ * @param uselessfeatures the indices of the features being pruned.
+ * @param numberFeatures the total number of features before pruning.
+ */
+ public void pruneWeights(int[] uselessfeatures, int numberFeatures) {
+ int sz = numberFeatures - uselessfeatures.length;
+ double[] newweights = new double[sz+biasFeatures];
+ int nextToPrune = 0;
+ int newweightindex = 0;
+ for (int i = 0; i < weights.length; i++) {
+ if (nextToPrune < uselessfeatures.length && i == uselessfeatures[nextToPrune]) {
+ if (Math.abs(weights[i]) > this.featurePruningThreshold)
+ throw new IllegalArgumentException("Pruning a high value weight : "+weights[i]+" at "+i);
+ nextToPrune++;
+ } else {
+ if (newweightindex >= newweights.length)
+ throw new IllegalArgumentException("Attempted to overpopulate the new weight : indx="
+ +i+" features="+numberFeatures+" useless="+uselessfeatures.length);
+ newweights[newweightindex] = weights[i];
+ newweightindex++;
+ }
+ }
+
+ // do some sanity checks.
+ if (newweightindex != newweights.length)
+ throw new IllegalArgumentException("The new pruned weight vector was not fully populated!");
+ if (nextToPrune != uselessfeatures.length)
+ throw new IllegalArgumentException("Not all the prunable features were pruned!");
+
+ // all good, do the replacement.
+ System.out.println("SVM.pruneWeights: "+sz+" features, "+newweights.length+" weights size");
+ numFeatures = sz;
+ weights = newweights;
+ }
+
/**
* Using this method, the winner-take-all competition is narrowed to involve only those labels
* contained in the specified list. The list must contain only Strings.
@@ -1062,6 +1174,14 @@ public static class Parameters extends Learner.Parameters {
*
**/
public String solverType;
+
+ /**
+ * @return the solverType
+ */
+ public String getSolverType() {
+ return solverType;
+ }
+
/**
* The cost parameter C; default {@link SupportVectorMachine#defaultC}
**/
@@ -1081,7 +1201,9 @@ public static class Parameters extends Learner.Parameters {
* false
**/
public boolean displayLL;
-
+
+ /** feature pruning threshold caps magnitude of useful features. */
+ public double featurePruningThreshold;
/** Sets all the default values. */
public Parameters() {
@@ -1090,6 +1212,7 @@ public Parameters() {
epsilon = defaultEpsilon;
bias = defaultBias;
displayLL = false;
+ featurePruningThreshold = defaultFeaturePruningThreshold;
}
@@ -1104,6 +1227,7 @@ public Parameters(Learner.Parameters p) {
epsilon = defaultEpsilon;
bias = defaultBias;
displayLL = false;
+ featurePruningThreshold = defaultFeaturePruningThreshold;
}
@@ -1115,6 +1239,7 @@ public Parameters(Parameters p) {
epsilon = p.epsilon;
bias = p.bias;
displayLL = p.displayLL;
+ featurePruningThreshold = p.featurePruningThreshold;
}
@@ -1168,6 +1293,8 @@ public String nonDefaultString() {
result += ", epsilon = " + epsilon;
if (bias != SupportVectorMachine.defaultBias)
result += ", bias = " + bias;
+ if (featurePruningThreshold != defaultFeaturePruningThreshold)
+ result += ", feature pruning threshold = " + featurePruningThreshold;
if (result.startsWith(", "))
result = result.substring(2);
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LexiconOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LexiconOptimizer.java
new file mode 100644
index 00000000..f6a68d15
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LexiconOptimizer.java
@@ -0,0 +1,181 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Map.Entry;
+
+import edu.illinois.cs.cogcomp.lbjava.classify.DiscreteConjunctiveFeature;
+import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
+import edu.illinois.cs.cogcomp.lbjava.classify.RealConjunctiveFeature;
+import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon;
+
+/**
+ * This class defines the life cycle methods for pruning useless features
+ * from a lexicon. Features for example that carry zero weights with them are
+ * not useful to the model, so can be eliminated saving space and execution time, without
+ * affecting accuracy (much).
+ * @author redman
+ */
+abstract public class LexiconOptimizer {
+
+ /** any weight less than this is considered irrelevant. This is for prunning. */
+ private static final double PRUNING_THRESHOLD = 0.000001;
+
+ /** lexicon contains the features we will operate on. */
+ protected Lexicon lexicon;
+
+ /** this also for testing, save feature names we will delete, check the names when we do. */
+ final protected ArrayList uselessFeatureNames = new ArrayList();
+
+ /** this is the threshold we use to discard useless features. */
+ protected double threshold = PRUNING_THRESHOLD;
+
+ /**
+ * We must have a lexicon to perform this operation.
+ * @param lexicon the lexicon object.
+ * @param threshold the feature pruning threshold.
+ */
+ protected LexiconOptimizer(Lexicon lexicon, double threshold) {
+ this.lexicon = lexicon;
+ this.threshold = threshold;
+ }
+
+ /**
+ * Determine if the provided feature has sum of weights greater than a threshold value,
+ * and discard the feature if it falls below.
+ * @param lex the lexicon.
+ * @param f the feature.
+ * @return true if the feature has any value, there is a
+ */
+ abstract protected boolean hasWeight(Lexicon lex, Feature f);
+
+ /**
+ * This method returns the number of features. This implementation assumes the
+ * lexicon is populated, but that's not always the case (with SVM for example appears
+ * to not always have a populated lexicon). In these cases, this method may be overriden.
+ * @return the number of featues.
+ */
+ protected int getNumberFeatures() {
+ return lexicon.size();
+ }
+
+ /**
+ * do the optimization
+ */
+ public void optimize () {
+
+ int originalNumFeatures = this.getNumberFeatures();
+ int [] uselessfeatures = identifyUselessFeatures();
+ pruneWeights(uselessfeatures, originalNumFeatures);
+ pruneLexicon(uselessfeatures);
+
+ System.out.println("LexiconOptimizer optimization complete, pruned "
+ +uselessfeatures.length+" features of "+originalNumFeatures+", leaving "+(originalNumFeatures - uselessfeatures.length)+
+ " at threshold of "+threshold);
+ }
+
+ /**
+ * @param f the feature.
+ * @return true if the feature is conjunctive.
+ */
+ static private boolean isConjunctive(Feature f) {
+ return (f instanceof DiscreteConjunctiveFeature || f instanceof RealConjunctiveFeature);
+ }
+
+ /**
+ * If this conjunctive feature has weight, add it and all it's children to the white list.
+ * @param lex the lexicon maps feature to index.
+ * @param whitelist the white list we will add to.
+ * @param f the conjunctive feature.
+ */
+ private void traverseConjunctiveTree(HashSet whitelist, Feature f) {
+
+ // add the conjunctive feature.
+ whitelist.add(f);
+
+ if (f instanceof DiscreteConjunctiveFeature) {
+
+ // add it's direct children
+ DiscreteConjunctiveFeature dcf = (DiscreteConjunctiveFeature) f;
+ whitelist.add(dcf.getLeft());
+ whitelist.add(dcf.getRight());
+
+ // possible add any children of children.
+ if (isConjunctive(dcf.getLeft()))
+ traverseConjunctiveTree(whitelist, dcf.getLeft());
+ if (isConjunctive(dcf.getRight()))
+ traverseConjunctiveTree(whitelist, dcf.getRight());
+ } else {
+
+ // add it's direct children
+ RealConjunctiveFeature rcf = (RealConjunctiveFeature) f;
+ whitelist.add(rcf.getLeft());
+ whitelist.add(rcf.getRight());
+
+ // possible add any children of children.
+ if (isConjunctive(rcf.getLeft()))
+ traverseConjunctiveTree(whitelist, rcf.getLeft());
+ if (isConjunctive(rcf.getRight()))
+ traverseConjunctiveTree(whitelist, rcf.getRight());
+ }
+ }
+
+ /**
+ * Find all features we must whitelist. For each conjunctive feature that has weight, we must keep
+ * all it's children, regardless of weight, and the rest of the tree from there on down.
+ * @param lex the lexicon.
+ * @return the conjunctive features.
+ */
+ protected HashSet compileWhitelist(Lexicon lex) {
+ HashSet whitelist = new HashSet();
+ for (Object e : lex.getMap().entrySet()) {
+ @SuppressWarnings("unchecked")
+ Entry entry = (Entry) e;
+ Feature f = entry.getKey();
+ if (isConjunctive(f) && this.hasWeight(lex, f)) {
+
+ // add this conjunctive feature and all it's kids to the whitelist.
+ traverseConjunctiveTree(whitelist, f);
+ }
+ }
+ return whitelist;
+ }
+
+
+ /**
+ * Given a list of useless features, prune the entries from the lexicon.
+ * @param uselessfeatures
+ */
+ protected void pruneLexicon(int[] uselessfeatures) {
+ lexicon.discardPrunedFeatures(uselessfeatures);
+ for (Feature f : this.uselessFeatureNames) {
+ if (lexicon.contains(f)) {
+ throw new RuntimeException("The features were not correctly removed from the lexicon : " + f.getStringIdentifier());
+ }
+ }
+ }
+
+ /**
+ * This method selects the features to be pruned. If weights
+ * are needed, they must be passed to the constructor and stored in fields of
+ * the implementing class. In this way, we make no assumptions about the
+ * structure of the weight classes.
+ * @return
+ */
+ abstract protected int[] identifyUselessFeatures();
+
+ /**
+ * Once we have identified the useless entries, we need to optimize the
+ * model components.
+ * @param uselessfeatures the indices of those features with no significant weights.
+ * @param originalNumFeatures the number of features in the original lexicon.
+ */
+ abstract public void pruneWeights(int[] uselessfeatures, int originalNumFeatures);
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java
new file mode 100644
index 00000000..44101b33
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java
@@ -0,0 +1,144 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Map.Entry;
+
+import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
+import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon;
+import edu.illinois.cs.cogcomp.lbjava.learn.LinearThresholdUnit;
+import edu.illinois.cs.cogcomp.lbjava.learn.SparseAveragedPerceptron;
+import gnu.trove.set.hash.TIntHashSet;
+
+/**
+ * This class will optimize any working LinearThresholdUnit subclass by pruning
+ * low value features.
+ * @author redman
+ */
+public class LinearThresholdUnitOptimizer extends LexiconOptimizer {
+
+ /** the LTU learner we want to optimize. */
+ private LinearThresholdUnit ltuLearner;
+
+ /** this also for testing, save feature names we will delete, check the names when we do. */
+ final ArrayList uselessFeatureNames = new ArrayList();
+
+ /**
+ * Given the LTU learner to optimize.
+ * @param snl the LTU learner.
+ */
+ public LinearThresholdUnitOptimizer(LinearThresholdUnit ltu) {
+ super(ltu.demandLexicon(), ltu.featurePruningThreshold);
+ ltuLearner = ltu;
+ }
+
+ /**
+ * Determine if the provided feature has sum of weights greater than a threshold value,
+ * and discard the feature if it falls below.
+ * @param lex the lexicon.
+ * @param f the feature.
+ * @return true if the feature has any value, there is a
+ */
+ protected boolean hasWeight(Lexicon lex, Feature f) {
+ int featureindex = lex.lookup(f);
+
+ // we assume each element of the network is of the same type, if that type is sparse averaged
+ // perceptron, we check both the averaged and current weight
+ double sum;
+ if (this.ltuLearner instanceof SparseAveragedPerceptron) {
+ SparseAveragedPerceptron sap = (SparseAveragedPerceptron) this.ltuLearner;
+ double wt = sap.getWeightVector().getRawWeights().get(featureindex);
+ double avg = sap.getAveragedWeightVector().getRawWeights().get(featureindex);
+ sum = Math.abs(wt);
+ sum += Math.abs(avg);
+ } else {
+ double wt = this.ltuLearner.getWeightVector().getRawWeights().get(featureindex);
+ sum = Math.abs(wt);
+ }
+
+ // if the value is sufficiently large, then we have a good weight and should keep.
+ if (sum > this.threshold)
+ return true;
+ else
+ return false;
+ }
+
+ /**
+ * In this case, we must check, for each feature, the associated set of weight in each weight
+ * vector, if they are all very small, it is useless. The array returned is sorted ascending.
+ * @return the set of useless features.
+ * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#identifyUselessFeatures()
+ */
+ @SuppressWarnings("unchecked")
+ @Override
+ protected int[] identifyUselessFeatures() {
+ Lexicon lex = this.ltuLearner.demandLexicon();
+ if (lex != null) {
+ HashSet whitelist = compileWhitelist(lex);
+
+ // we have the conjunctive features, if left, right, or the parent itself has a non zero weight,
+ // consider non of the features (parent, left or right) useless, whitelist them.
+ int count = 0;
+ int numberfeatures = lex.size();
+ int[] all = new int[numberfeatures];
+ TIntHashSet defunct = new TIntHashSet();
+ for (Object e : lex.getMap().entrySet()) {
+ Entry entry = (Entry) e;
+ int fi = entry.getValue();
+ if (!whitelist.contains(entry.getKey())) {
+ double wt = Math.abs(this.ltuLearner.getWeightVector().getRawWeights().get(fi));
+
+ // if the value is sufficiently large, then we have a good weight and should keep.
+ if (wt < this.threshold) {
+
+ // This is a useless feature
+ all[count] = fi;
+ if (defunct.contains(fi)) {
+ System.err.println("There was a feature discarded twice during feature pruning!");
+ } else {
+ defunct.add(fi);
+ }
+ this.uselessFeatureNames.add(entry.getKey());
+ count++;
+ }
+ }
+ }
+
+ int[] useless = new int[count];
+ System.arraycopy(all, 0, useless, 0, count);
+ Arrays.sort(useless);
+ return useless;
+ } else
+ return new int[0];
+ }
+
+ /**
+ * Check it out when done, make sure it worked.
+ */
+ protected void pruneLexicon(int[] uselessfeatures) {
+ super.pruneLexicon(uselessfeatures);
+ for (Feature f : this.uselessFeatureNames) {
+ if (lexicon.contains(f)) {
+ throw new RuntimeException("The features were not correctly removed from the lexicon : " + f.getStringIdentifier());
+ }
+ }
+ }
+
+ /**
+ * Not we remove the useless weights from ALL weight vectors. There must be the same number
+ * of entries in each weight vector as there is in the lexicon.
+ * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#pruneWeights(int[])
+ */
+ @Override
+ public void pruneWeights(int[] uselessfeatures, int origNumFeatures) {
+ this.ltuLearner.pruneWeights(uselessfeatures, origNumFeatures);
+ }
+}
\ No newline at end of file
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java
new file mode 100644
index 00000000..45cfe812
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java
@@ -0,0 +1,149 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Map.Entry;
+
+import edu.illinois.cs.cogcomp.core.datastructures.vectors.OVector;
+import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
+import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon;
+import edu.illinois.cs.cogcomp.lbjava.learn.LinearThresholdUnit;
+import edu.illinois.cs.cogcomp.lbjava.learn.SparseAveragedPerceptron;
+import edu.illinois.cs.cogcomp.lbjava.learn.SparseNetworkLearner;
+import gnu.trove.set.hash.TIntHashSet;
+
+/**
+ * This class will optimize the SparseNetworkLearner by discarding all features
+ * associated with no sufficiently high weight values. For the network learner, we
+ * much check the weights across all the binary learners to determin the value
+ * of a particular feature.
+ * @author redman
+ */
+public class SparseNetworkOptimizer extends LexiconOptimizer {
+
+ /** the network learner we want to optimize. */
+ private SparseNetworkLearner networkLearner;
+
+ /**
+ * Given the sparse net learner to optimize.
+ * @param snl the sparse net learner.
+ */
+ public SparseNetworkOptimizer(SparseNetworkLearner snl) {
+ super(snl.demandLexicon(), snl.getBaseLTU().featurePruningThreshold);
+ networkLearner = snl;
+ }
+
+ /**
+ * Determine if the provided feature has sum of weights greater than a threshold value,
+ * and discard the feature if it falls below.
+ * @param lex the lexicon.
+ * @param f the feature.
+ * @return true if the feature has any value, there is a
+ */
+ protected boolean hasWeight(Lexicon lex, Feature f) {
+ OVector net = networkLearner.getNetwork();
+ if (net.size() == 0)
+ return false;
+ int numberclasses = net.size();
+ int i = 0;
+ double sum = 0;
+ int featureindex = lex.lookup(f);
+
+ // we assume each element of the network is of the same type, if that type is sparse averaged
+ // perceptron, we check both the averaged and current weight
+ if (net.get(0) instanceof SparseAveragedPerceptron) {
+ for (; i < numberclasses; ++i) {
+ SparseAveragedPerceptron sap = (SparseAveragedPerceptron) net.get(i);
+ double wt = sap.getWeightVector().getRawWeights().get(featureindex);
+ double avg = sap.getAveragedWeightVector().getRawWeights().get(featureindex);
+ sum += Math.abs(wt);
+ sum += Math.abs(avg);
+
+ // if the value is sufficiently large, then we have a good weight and should keep.
+ if (sum >= this.threshold)
+ return true;
+ }
+ } else {
+ for (; i < numberclasses; ++i) {
+ LinearThresholdUnit ltu = (LinearThresholdUnit) net.get(i);
+ double wt = ltu.getWeightVector().getRawWeights().get(featureindex);
+ sum += Math.abs(wt);
+
+ // if the value is sufficiently large, then we have a good weight and should keep.
+ if (sum >= this.threshold)
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * In this case, we must check, for each feature, the associated set of weight in each weight
+ * vector, if they are all very small, it is useless. The array returned is sorted ascending.
+ * @return the set of useless features.
+ * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#identifyUselessFeatures()
+ */
+ @SuppressWarnings("unchecked")
+ @Override
+ protected int[] identifyUselessFeatures() {
+ Lexicon lex = networkLearner.demandLexicon();
+ if (lex != null) {
+
+ // we have the conjunctive features, if left, right, or the parent itself has a non zero weight,
+ // consider non of the features (parent, left or right) useless, whitelist them.
+ HashSet whitelist = compileWhitelist(lex);
+ int count = 0;
+ int numberfeatures = lex.size();
+ int[] all = new int[numberfeatures];
+ TIntHashSet defunct = new TIntHashSet();
+
+ // For each feature, determin it's value. We will interate over a map with features as key
+ // and the integer index of the feature. If the feature is whitelisted, we keep, otherwise
+ // check for uselessness and if so, add to the list.
+ for (Object e : lex.getMap().entrySet()) {
+ Entry entry = (Entry) e;
+ if (!whitelist.contains(entry.getKey())) {
+ int fi = entry.getValue();
+ if (!hasWeight(lexicon, entry.getKey())) {
+ all[count] = fi;
+ if (defunct.contains(fi)) {
+ System.err.println("There was a feature discarded twice during feature pruning!");
+ } else {
+ defunct.add(fi);
+ }
+
+ this.uselessFeatureNames.add(entry.getKey());
+ count++;
+ }
+ }
+ }
+
+ int[] useless = new int[count];
+ System.arraycopy(all, 0, useless, 0, count);
+ Arrays.sort(useless);
+ return useless;
+ } else
+ return new int[0];
+ }
+
+ /**
+ * Not we remove the useless weights from ALL weight vectors. There must be the same number
+ * of entries in each weight vector as there is in the lexicon.
+ * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#pruneWeights(int[])
+ */
+ @Override
+ public void pruneWeights(int[] uselessfeatures, int origNumFeatures) {
+ OVector ltus = networkLearner.getNetwork();
+ for (int i = 0; i < ltus.size(); i++) {
+ LinearThresholdUnit ltu = (LinearThresholdUnit) ltus.get(i);
+ ltu.pruneWeights(uselessfeatures, origNumFeatures);
+ }
+ }
+}
\ No newline at end of file
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java
new file mode 100644
index 00000000..5b1fa976
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java
@@ -0,0 +1,125 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning;
+
+import java.util.*;
+import java.util.Map.Entry;
+
+import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
+import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon;
+import edu.illinois.cs.cogcomp.lbjava.learn.SupportVectorMachine;
+
+/**
+ * Optimized a support vector machine by discarding any sufficiently low weights.
+ * @author redman
+ */
+public class SupportVectorMachineOptimizer extends LexiconOptimizer {
+
+ /** the model we are going to optimize. */
+ private SupportVectorMachine svm = null;
+
+ /** the number of classes, if the numclasses is two, consider it binary and change to one. */
+ public int numberclasses = -1;
+
+ /** the biasfeatures are 0 for no added bias features, or 1 if bias is added. */
+ public int biasfeatures = 0;
+
+ /**
+ * Take lex and model, and optimize the model by pruning the weights. Any zero weights get pruned.
+ * @param lexicon the lexicon with the feature map.
+ * @param s the support vector machine.
+ */
+ public SupportVectorMachineOptimizer(SupportVectorMachine s) {
+ super(s.demandLexicon(), s.featurePruningThreshold);
+ this.svm = s;
+
+ // the numClasses field gets change in the write method to allow for the binary case
+ // which is actually two classes to behave as one class (binary).
+ if (!s.getSolverType().equals("MCSVM_CS") && s.getNumClasses() <= 2)
+ numberclasses = 1;
+ else
+ numberclasses = s.getNumClasses();
+
+ // we need to figure out if we have a bias feature introduced
+ this.biasfeatures = svm.getBiasFeatures();
+ }
+
+ /**
+ * Determine if the provided feature has sum of weights greater than a threshold value,
+ * and discard the feature if it falls below.
+ * @param lex the lexicon.
+ * @param f the feature.
+ * @return true if the feature has any value, there is a
+ */
+ protected boolean hasWeight(Lexicon lex, Feature f) {
+ int index = lex.lookup(f);
+ return getWeight(index) > this.threshold;
+ }
+
+ /**
+ * Compute the single weight at the index as the sum of all weights for all classes.
+ * @param index the index of the feature
+ * @return the sum of the absolute value of all weights for the feature.
+ */
+ private double getWeight(int index) {
+ double sum = 0;
+ for (int i = 0; i < this.numberclasses; i++) {
+ sum += Math.abs(svm.getWeights()[index]);
+ index += (this.lexicon.size() + biasfeatures);
+ }
+ return sum;
+ }
+
+ /**
+ * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#identifyUselessFeatures()
+ */
+ @Override
+ protected int[] identifyUselessFeatures() {
+
+ // compile the whitelist
+ HashSet whitelist = compileWhitelist(lexicon);
+
+ // look at each feature in the lexicon, any with zero weights can be safely discarded.
+ int [] all = new int [this.lexicon.size()];
+ int count = 0;
+ for (Object e : lexicon.getMap().entrySet()) {
+ @SuppressWarnings("unchecked")
+ Entry entry = (Entry) e;
+ if (!whitelist.contains(entry.getKey())) {
+ int fi = entry.getValue();
+ double wt = getWeight(fi);
+ if (wt < this.threshold) {
+ all[count] = fi;
+ count++;
+ }
+ }
+ }
+ int[] useless = new int[count];
+ System.arraycopy(all, 0, useless, 0, count);
+ Arrays.sort(useless);
+ return useless;
+ }
+
+ /**
+ * This method returns the number of features. This implementation assumes the
+ * lexicon is populated, but that's not always the case (with SVM for example appears
+ * to not always have a populated lexicon). In these cases, this method may be overriden.
+ * @return the number of featues.
+ */
+ protected int getNumberFeatures() {
+ return this.svm.getNumFeatures();
+ }
+
+ /**
+ * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#pruneWeights(int[], int)
+ */
+ @Override
+ public void pruneWeights(int[] uselessfeatures, int originalNumFeatures) {
+ this.svm.pruneWeights(uselessfeatures, originalNumFeatures);
+ }
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java
new file mode 100644
index 00000000..c9b185bc
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java
@@ -0,0 +1,38 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+/**
+ * For sparse learners, it is often the case that the array of features you learn
+ * contains only a subset of useful features. When we leave these features in the lexicon,
+ * we end up with bloated lexicons and weight vectors. This leads to larger than necessary
+ * models.
+ *
+ * This package contains an interface that defines the life cycle for the feature pruning
+ * process, as well as some implementations, one that takes multiple weight vectors (for
+ * multi-class network learners), and some that takes only one weight vector.
+ *
+ * All optimizers should subclass @see LexiconOptimizer which implements most of the
+ * optimization. Subclass will need to provide methods to compute the weight value to compare
+ * against the threshold, a method to identify the useless features, and a method to prune
+ * those features.
+ *
+ * The optimizers are invoked by the {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner#doneTraining}
+ * method of the Learner class when all learning is complete. For those learners that include a feature
+ * pruning implementation, they must override this method to invoke the optimizer. In this way, during the
+ * normal LBJava compile and model build cycle, the optimization is performed automatically. For those
+ * who have build their own training procedure, they are required to invoke the doneTraining and
+ * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner#startTraining} method at appropriate points during
+ * their training process.
+ *
+ * The learner classes typically have a parameter that can be set to change the default feature
+ * pruning threshold to any the user might choose, or it can be set to 0.0 to disable.
+ *
+ * The pruning threshold value is provided by the specific learner, and should be, in one way or
+ * another, parameterized.
+ * @author redman
+ */
+package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning;
\ No newline at end of file
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ActThread.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ActThread.java
new file mode 100644
index 00000000..20929909
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ActThread.java
@@ -0,0 +1,71 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer.Range;
+
+/**
+ * This thread will compute a single activtion value, for each layer
+ * setup must be called to provide the output array, the layer and the
+ * input values.
+ * @author redman
+ */
+class ActThread extends PushThread {
+
+ /** the input data. */
+ float[] currentInputs = null;
+
+ /** the layer we are operating on. */
+ Layer layer = null;
+
+ /** the resulting outputs are stored here, this array is shared
+ * by all threads activating on this layer. */
+ float [] layerActivations = null;
+
+ /** used to make the name of the thread unique. */
+ private static int inc = 0;
+
+ /**
+ * init with a mux.
+ * @param m the multiplexer.
+ */
+ ActThread() {
+ super("ActThread-"+(inc++));
+ }
+
+ /**
+ * before we start a layer, this is called to set up the thread.
+ * @param ci the input data.
+ * @param l the layer.
+ * @param la the layer actvation values.
+ * @param mux the multiplexer.
+ */
+ void setup(float[] ci, Layer l, float[] la) {
+ this.currentInputs = ci;
+ this.layer = l;
+ this.layerActivations = la;
+ }
+
+ /**
+ * Run forever never quite.
+ */
+ public void run() {
+ synchronized (this) {
+ while(true) {
+
+ // wait for the range object to be set.
+ Range r = this.getRange();
+ if (r == null)
+ return;
+ for (int indx = r.start; indx < r.end; indx++) {
+ layerActivations[indx] = layer.computeOneOutput(indx, currentInputs);
+ }
+ }
+ }
+ }
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Activator.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Activator.java
new file mode 100644
index 00000000..74b25f0a
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Activator.java
@@ -0,0 +1,25 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+/**
+ * Implementations will activate all the layers of the net and
+ * produce a set of outputs. The one required method will return
+ * all the output values.
+ * @author redman
+ */
+public interface Activator {
+
+ /**
+ * Activate the provided layer, return the resulting outputs.
+ * @param inputs the input data.
+ * @param layer the layer to supply the inputs to.
+ * @return the output values.
+ */
+ public float[] prediction(float[] inputs);
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ConvergenceMeasure.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ConvergenceMeasure.java
new file mode 100644
index 00000000..8237a18b
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ConvergenceMeasure.java
@@ -0,0 +1,27 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+/**
+ * Measure convergence, simplest implementation may simply run some number of epochs,
+ * more sophosticated will look some function of cumulative error going to zero at the
+ * end of an epoch. Conversion is always measured at the end of a training cycle.
+ * @author redman
+ */
+public interface ConvergenceMeasure {
+
+ /**
+ * With the given inputs and outputs, evaluate the results of the last iteration,
+ * determine the error, probably store that, and if convergence (what whatever measure)
+ * is achieved, return true, else return false.
+ *
+ * @param learner the learner being used to train up the neural net, contains the cummulative error.
+ * @return true if converged.
+ */
+ public boolean evaluate(NNTrainingInterface learner);
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/DatasetReader.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/DatasetReader.java
new file mode 100644
index 00000000..acd54807
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/DatasetReader.java
@@ -0,0 +1,98 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.InputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+/**
+ * @author redman
+ *
+ */
+public class DatasetReader {
+
+ /**
+ * flip the byte order.
+ * @param is input stream.
+ * @return the integer.
+ * @throws IOException
+ */
+ private static int readInt(InputStream is) throws IOException {
+ int i0 = is.read();
+ int i1 = is.read();
+ int i2 = is.read();
+ int i3 = is.read();
+ return (i0<<24) + (i1<<16) + (i2<<8) + i3;
+ }
+
+ /**
+ * get the examples form an NIST dataset, return everything at once. There are
+ * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats =
+ * 47 million floats. These are input examples, so they are image data.
+ * @param filename
+ * @return the input examples.
+ * @throws IOException
+ */
+ public static float[][] getExampleInputs(String filename) throws IOException {
+ InputStream dis = new BufferedInputStream(new FileInputStream(new File(filename)));
+ int m1 = readInt(dis);
+ if (m1 != 2051)
+ throw new IOException("That was not an example file! magic code = "+m1);
+ int numExamples = readInt(dis);
+ if (numExamples != 60000)
+ System.out.println("We expecting 60k examples "+m1);
+ int numRows = readInt(dis);
+ if (numRows != 28)
+ System.out.println("We expecting 28 rows "+numRows);
+ int numColumns = readInt(dis);
+ if (numColumns != 28)
+ System.out.println("We expecting 28 columns "+numColumns);
+ int totalpixels = numRows*numColumns;
+ float [][] examples = new float [numExamples][totalpixels];
+ for (int i = 0 ; i < examples.length; i++) {
+ for (int j = 0; j < totalpixels; j++) {
+ examples[i][j] = (float)(dis.read()/128f) - 1f;
+ }
+ }
+ return examples;
+ }
+
+ /**
+ * get the examples form an NIST dataset, return everything at once. There are
+ * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats =
+ * 47 million floats. These are input examples, so they are image data.
+ * @param filename
+ * @return the output examples.
+ * @throws IOException
+ */
+ public static float[][] getExampleOutputs(String filename) throws IOException {
+ InputStream dis = new BufferedInputStream(new FileInputStream(new File(filename)));
+ int m1 = readInt(dis);
+ if (m1 != 2049)
+ throw new IOException("That was not an example file! magic code = "+m1);
+ int numExamples = readInt(dis);
+ float [][] examples = new float [numExamples][1];
+ for (int i = 0 ; i < numExamples; i++) {
+ examples[i][0] = (float)(dis.read()/5f) - 1f;
+ }
+ return examples;
+ }
+
+ /**
+ * @param a
+ * @throws IOException
+ */
+ @SuppressWarnings("unused")
+ public static void main(String[]a) throws IOException {
+ float[][] examples = getExampleInputs("/Users/redman/Desktop/NNTrainingData/train-images-idx3-ubyte");
+ float[][] labels = getExampleOutputs("/Users/redman/Desktop/NNTrainingData/train-labels-idx1-ubyte");
+ }
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/EpochConvergenceMeasure.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/EpochConvergenceMeasure.java
new file mode 100644
index 00000000..a794cf6a
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/EpochConvergenceMeasure.java
@@ -0,0 +1,42 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+/**
+ * Terminate agfter so many iterations.
+ * @author redman
+ */
+public class EpochConvergenceMeasure implements ConvergenceMeasure {
+
+ /** the current epoch count. */
+ private int epoch = 0;
+
+ /** the current epoch count. */
+ private int max;
+
+ /**
+ * Takes the number of iterations.
+ * @param m the max iterations.
+ */
+ public EpochConvergenceMeasure(int m) {
+ this.max = m;
+ }
+
+ /**
+ * @see edu.illinois.cs.cogcomp.lbjava.neuralnet.ConvergenceMeasure#evaluate(edu.illinois.cs.cogcomp.lbjava.neuralnet.NNTrainingInterface)
+ */
+ @Override
+ public boolean evaluate(NNTrainingInterface learner) {
+ epoch++;
+ if (epoch > max) {
+ return true;
+ } else
+ return false;
+ }
+
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Layer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Layer.java
new file mode 100644
index 00000000..29adfeb0
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Layer.java
@@ -0,0 +1,277 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+import java.io.PrintStream;
+import java.util.Random;
+
+/**
+ * This is a layer in a neural net. it is characterized by a number of inputs
+ * and a number of outputs. The neurons (perceptrons) are hidden within, I see
+ * no reason to expose them. this allows the layer class to do all computations
+ * across the entire layer in one pass, which is very efficient. Downside; Nothing
+ * in this implementation will allow you to assign per neuron attributes. Also, the
+ * weights are represented by a primitive array, so only 32 bit indices meaning no
+ * more than 2 ^ 32 weights are allowed.
+ * @author redman
+ */
+public class Layer {
+
+ /** number of inputs to this layer. */
+ private int numberInputs;
+
+ /** the number of outputs from this layer. */
+ private int numberOutputs;
+
+ /** the neuron weights. */
+ private float[] weights;
+
+ /** the derived outputs. */
+ private float[] dweights;
+
+ /** collects output values. */
+ private float[] outputs;
+
+ /**
+ * The layer constructed.
+ * @param numIn the number of inputs.
+ * @param numOut the number of outputs.
+ */
+ public Layer(int numIn, int numOut) {
+ this(numIn, numOut, new Random());
+ outputs = new float[numOut];
+ }
+
+ /**
+ * The layer constructed.
+ * @param numIn the number of inputs.
+ * @param numOut the number of outputs.
+ * @param r the random num generator.
+ */
+ public Layer(int numIn, int numOut, Random r) {
+ this.numberInputs = numIn;
+ this.numberOutputs = numOut;
+ int wl = (numIn+1)*numOut;
+ weights = new float[wl];
+ dweights = new float[wl];
+ for (int i = 0; i < wl; i++)
+ weights [i] = (r.nextFloat() - 0.5f) * 4f;
+ outputs = new float[numOut];
+ }
+
+ /**
+ * Compute the sigmoid first derivative.
+ * @param x the input value
+ * @return the sigmoid
+ */
+ final private float sigmoid(float x) {
+ return (float) (1.0 / (1.0 + Math.exp(-x)));
+ }
+
+ /**
+ * @return the weights
+ */
+ public float[] getWeights() {
+ return weights;
+ }
+
+ /**
+ * @param weights the weights to set
+ */
+ public void setWeights(float[] weights) {
+ this.weights = weights;
+ }
+
+ /**
+ * @return the numberInputs
+ */
+ public int getNumberInputs() {
+ return numberInputs;
+ }
+
+ /**
+ * @param numberInputs the numberInputs to set
+ */
+ public void setNumberInputs(int numberInputs) {
+ this.numberInputs = numberInputs;
+ }
+
+ /**
+ * @return the numberOutputs
+ */
+ public int getNumberOutputs() {
+ return numberOutputs;
+ }
+
+ /**
+ * @param numberOutputs the numberOutputs to set
+ */
+ public void setNumberOutputs(int numberOutputs) {
+ this.numberOutputs = numberOutputs;
+ }
+
+ /**
+ * This granularity of method invocation is only necessary so parallelize
+ * the process.
+ * @param index the index of the input to compute the output for.
+ * @param inputs the inputs.
+ * @return the activation output.
+ */
+ final float computeOneOutput(int index, float[] inputs) {
+ float result = 0.0f;
+ int nI = this.numberInputs;
+ int start = index * (nI+1);
+ for (int k = 0 ; k < nI ; k++) {
+ result += weights[start+k] * inputs[k];
+ }
+ result += weights[start+nI];
+ return (float) sigmoid(result);
+ }
+
+ /**
+ * Given a set of inputs, produce the set of activation
+ * values.
+ * @param inputs the inputs to produce the predictions for.
+ * @return the set of predictions.
+ */
+ final public float[] activate(float[] inputs) {
+ int nO = this.numberOutputs;
+ float[] o = this.outputs;
+ for (int j = 0 ; j < nO ; j++) {
+ o[j] = this.computeOneOutput(j, inputs);
+ }
+ return outputs;
+ }
+
+ /**
+ * train up weights for just one output. Thread safety must be noted here, since everybody will be
+ * updating the nextError array at the same time. To avoid doing repeated synchronizations which are
+ * expensive here, for multithreaded trainer, we pass in a dummy error array, update at will, then
+ * the caller is responsible for synchronizing on the real one and updating the shared sum error array.
+ * @param error the activation errors used to compute the backprop value.
+ * @param input the input date.
+ * @param output the computed output data.
+ * @param learningRate the learning rate.
+ * @param momentum the momentum
+ * @param nextError the array where the error values will be updated
+ * @param outIndex the output index;
+ */
+ final public void trainOne(float[] error, float[] input, float[] output, float learningRate, float momentum, float[] nextError, int outIndex) {
+ int woffset = (this.numberInputs+1) * outIndex;
+ float d = error[outIndex] * (output[outIndex] * (1 - output[outIndex]));
+ for (int j = 0; j < this.numberInputs; j++) {
+ int windx = woffset + j;
+ nextError[j] += weights[windx] * d;
+ float dw = input[j] * d * learningRate;
+ weights[windx] += dweights[windx] * momentum + dw;
+ dweights[windx] = dw;
+ }
+
+ // compute the error for the bias, the fake bias input is always 1.
+ int windx = woffset + input.length;
+ nextError[input.length] += weights[windx] * d;
+ float dw = d * learningRate;
+ weights[windx] += dweights[windx] * momentum + dw;
+ dweights[windx] = dw;
+ }
+
+ /**
+ * given a set of errors (errors from the next layer on), and adjust the weights
+ * to do a gradient descent.
+ * @param error the output errors.
+ * @param input the input data.
+ * @param output the desired output.
+ * @param learningRate the rate of learning.
+ * @param momentum helps to avoid local minima.
+ * @return the errors from this layer.
+ */
+ final public float[] train(float[] error, float[] input, float[] output, float learningRate, float momentum) {
+ int nI = this.numberInputs+1/*for the bias*/;
+ float[] nextError = new float[nI];
+ for (int i = 0; i < this.numberOutputs; i++) {
+ //this.trainOne(error, input, output, learningRate, momentum, nextError, i);
+
+ int woffset = nI * i;
+ float d = error[i] * (output[i] * (1 - output[i]));
+ for (int j = 0; j < this.numberInputs; j++) {
+ int windx = woffset + j;
+ nextError[j] += weights[windx] * d;
+ float dw = input[j] * d * learningRate;
+ weights[windx] += dweights[windx] * momentum + dw;
+ dweights[windx] = dw;
+ }
+
+ // compute the error for the bias, the fake bias input is always 1.
+ int windx = woffset + input.length;
+ nextError[input.length] += weights[windx] * d;
+ float dw = d * learningRate;
+ weights[windx] += dweights[windx] * momentum + dw;
+ dweights[windx] = dw;
+ }
+ return nextError;
+ }
+
+ /**
+ * print out the weights.
+ */
+ public void print() {
+ System.out.print(this.numberInputs+":"+this.numberOutputs);
+ System.out.print(" ");
+ for (float w : weights) {
+ System.out.format(" %.8f",w);
+ }
+ System.out.print(" (");
+ for (float w : dweights) {
+ System.out.format(" %.8f",w);
+ }
+ System.out.println(")");
+ }
+
+ /**
+ * @return the dweights
+ */
+ public float[] getDweights() {
+ return dweights;
+ }
+
+ /**
+ * @param dweights the dweights to set
+ */
+ public void setDweights(float[] dweights) {
+ this.dweights = dweights;
+ }
+
+ /**
+ * used for reporting mostely.
+ */
+ public String toString() {
+ StringBuffer sb = new StringBuffer();
+ sb.append("in : "+this.numberInputs+" out : "+this.numberOutputs);
+ sb.append("\n");
+ for (int i = 0; i < weights.length;) {
+ for (int j = 0; j < this.numberInputs;j++,i++) {
+ sb.append(" "+weights[i]);
+ sb.append(" ");
+ }
+ sb.append("\n");
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Write the representation to a digital output stream.
+ * @param out the output stream for serialization.
+ */
+ public void write(PrintStream out) {
+ out.print(numberInputs);
+ out.print(numberOutputs);
+ out.print(weights.length);
+ for (int i = 0; i < weights.length; ++i)
+ out.print(weights[i]);
+ }
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/LearnerThread.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/LearnerThread.java
new file mode 100644
index 00000000..d3568330
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/LearnerThread.java
@@ -0,0 +1,96 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+import java.util.Arrays;
+
+import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer.Range;
+
+/**
+ * This thread will compute a single activtion value, for each layer
+ * setup must be called to provide the output array, the layer and the
+ * input values.
+ * @author redman
+ */
+class LearnerThread extends PushThread {
+
+ /** the input error from the next layer being back propogated. */
+ float[] error = null;
+
+ /** the input labeled data. */
+ float[] input = null;
+
+ /** the input data. */
+ float[] output = null;
+
+ /** the result error SHARED ACROSS THREADS, must be synced to update. */
+ float [] nextError;
+
+ /** the space where updates to the errors will be set, later used to update nextError.*/
+ float [] errorWorkspace;
+
+ /** the learning rate. */
+ float learnRate;
+
+ /** the momentum. */
+ float momentum;
+
+ /** the layer we are operating on. */
+ Layer layer = null;;
+
+ /** the unique id. */
+ private static int inc = 0;
+
+ /**
+ * The learning rate and momentum will not change, so we will take them initially.
+ * @param lR the learning rate.
+ * @param m the momentum.
+ * @param mux the multiplexer.
+ */
+ LearnerThread(float lR, float m) {
+ super("LearnerThread-"+(inc++));
+ this.learnRate = lR;
+ this.momentum = m;
+ }
+
+ /**
+ * before we start a layer, this is called to set up the thread.
+ * @param error the error from the next layer, used to calc this layers error.
+ * @param input the input data.
+ * @param output the result data.
+ * @param nextError put the next layers input error here.
+ * @param layer the layer we operate on.
+ */
+ void setup(float [] error, float [] input, float [] output, float[] nextError, Layer layer) {
+ this.error = error;
+ this.input = input;
+ this.output = output;
+ this.nextError = nextError;
+ this.layer = layer;
+ this.errorWorkspace = new float[nextError.length];
+ Arrays.fill(this.errorWorkspace, 0);
+ }
+
+ /**
+ * Run till we complete the layer, then finish up.
+ */
+ public void run() {
+ synchronized (this) {
+ while(true) {
+
+ // wait for the range object to be set.
+ Range r = this.getRange();
+ if (r == null)
+ return;
+ for (int indx = r.start; indx < r.end; indx++) {
+ layer.trainOne(error, input, output, learnRate, momentum, errorWorkspace, indx);
+ }
+ }
+ }
+ }
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/NNTrainingInterface.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/NNTrainingInterface.java
new file mode 100644
index 00000000..84e29922
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/NNTrainingInterface.java
@@ -0,0 +1,23 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+/**
+ * @author redman
+ */
+public interface NNTrainingInterface {
+
+ /**
+ * Given a set of examples, and a set of desired outputs, train the network
+ * represented by the provided network layers the provided number of epochs.
+ * @param inputs the input data to train against.
+ * @param outputs the desired outputs.
+ * @param epochs the number of training iterations to run.
+ */
+ public void train(float[][] inputs, float[][]outputs, int epochs);
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/PushThread.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/PushThread.java
new file mode 100644
index 00000000..9dada667
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/PushThread.java
@@ -0,0 +1,84 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer.Range;
+
+/**
+ * Threads will operate on a range, this superclass contains that
+ * range and handles atomic synchronized access.
+ * @author redman
+ */
+public class PushThread extends Thread {
+
+ /** the range to operate on. */
+ protected Range range = null;
+
+ /** set when this thread is waiting for input. */
+ private boolean idle = false;
+ /**
+ * the push thread takes the name ofthe thread, to pass to
+ * the super.
+ * @param name the name of the thread.
+ */
+ PushThread(String name) {
+ super(name);
+ }
+
+ /**
+ * set the range of things to operate on.
+ * @param range
+ */
+ synchronized void setRange(Range range) {
+ this.range = range;
+ this.notifyAll();
+ }
+
+ /**
+ * call this when we are done.
+ */
+ synchronized void done() {
+ this.range = null;
+ this.interrupt();
+ }
+
+ /**
+ * wait for the thread to complete it's run, it will set
+ * poised and block till it gets data.
+ */
+ final synchronized public void waitIdle() {
+ while(!idle || range != null)
+ try {
+ this.wait();
+ } catch (InterruptedException e) {
+ }
+ }
+
+ /**
+ * wait for the next range.
+ * @return the range.
+ */
+ final synchronized protected Range getRange() {
+ while (range == null)
+ try {
+ this.idle = true;
+ this.notify(); // somebody waiting for completion?
+ this.wait();
+ } catch (InterruptedException e) {
+ if (this.isInterrupted()) {
+ System.out.println("Interrupted error.");
+ return null;
+ }
+ }
+ Range r = range;
+ range = null;
+ this.idle = false;
+ return r;
+ }
+
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/SimpleNNTrainer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/SimpleNNTrainer.java
new file mode 100644
index 00000000..951c3144
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/SimpleNNTrainer.java
@@ -0,0 +1,150 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+import java.util.Random;
+
+/**
+ * This class will simply learn up the NeuralNet layers, single threaded.
+ * @author redman
+ */
+public class SimpleNNTrainer implements NNTrainingInterface {
+
+ /** the layers of the neural network. */
+ private Layer[] layers;
+
+ /** scales the weight deltas for each iteration. */
+ private float learningRate = .3f;
+
+ /** this prevents local minimum capture. */
+ private float momentum = .6f;
+
+ /**
+ * Need the layer data, learning rate and momentum.
+ * @param l the layers of the neural net.
+ * @param rate the learning rate.
+ * @param mom the momentum.
+ */
+ public SimpleNNTrainer(Layer[] l, float rate, float mom) {
+ this.layers = l;
+ this.learningRate = rate;
+ this.momentum = mom;
+ }
+
+ /**
+ * given an input set of example, compute the output values, also return all the
+ * activation values in between, return them all. The results will be in the last
+ * vector in the returned array.
+ * @param inputs the inputs.
+ * @return the activation energies from all layers/
+ */
+ public final float [] classify(float[] inputs) {
+
+ int layerCount = layers.length;
+
+ // storage for each output of each layer, and the error computed for each activation.
+ float [][] activations = new float[layerCount][];
+
+ // This array contains inputs from previous layer
+ float [] currentinputs = inputs;
+ for (int i = 0 ; i < layerCount ; i++) {
+
+ // compute the activations for this layer.
+ Layer layer = layers[i];
+ activations[i] = layer.activate(currentinputs);
+ currentinputs = activations[i];
+ }
+ return activations[layerCount-1];
+ }
+
+ /**
+ * given an input set of example, compute the output values, also return all the
+ * activation values in between, return them all. The results will be in the last
+ * vector in the returned array.
+ * @param inputs the inputs.
+ * @return the activation energies from all layers/
+ */
+ public final float [][] activate(float[] inputs) {
+
+ int layerCount = layers.length;
+
+ // storage for each output of each layer, and the error computed for each activation.
+ float [][] activations = new float[layerCount][];
+
+ // This array contains inputs from previous layer
+ float [] currentinputs = inputs;
+ for (int i = 0 ; i < layerCount ; i++) {
+
+ // compute the activations for this layer.
+ Layer layer = layers[i];
+ activations[i] = layer.activate(currentinputs);
+ currentinputs = activations[i];
+ }
+ return activations;
+ }
+
+ /**
+ * Train with one example.
+ * @param inputs input data.
+ * @param outputs the labeled data.
+ * @param epochs
+ */
+ public void train(float[] inputs, float[]outputs) {
+ // storage for each output of each layer, and the error computed for each activation.
+ float [][] activations = this.activate(inputs);
+
+ // now we have all the activations.
+ float[] calcOut = activations[activations.length-1];
+ int errlen = calcOut.length;
+ float [] error = new float[errlen];
+ for (int i = 0; i < errlen; i++) {
+ error[i] = outputs[i] - calcOut[i]; // negative error
+ }
+ for (int i = layers.length - 1; i > 0; i--) {
+ error = layers[i].train(error,activations[i-1],activations[i], this.learningRate, this.momentum);
+ }
+ error = layers[0].train(error,inputs, activations[0], this.learningRate, this.momentum);
+ }
+ /**
+ * Execute the given number of epochs, then exit whatever the error.
+ * @param inputs the input examples.
+ * @param outputs the labels.
+ * @param layers
+ */
+ @Override
+ public void train(float[][] inputs, float[][]outputs, int epochs) {
+
+ // error checking.
+ if (inputs.length != outputs.length)
+ throw new RuntimeException("There must be the same number of input data records and output data records.");
+ int totalInputs = inputs.length;
+
+ // set up our counts.
+ int layerCount = layers.length;
+ Random r = new Random(34565);
+ for (int epoch = 0; epoch < epochs; epoch++) {
+ for (int inindx = 0; inindx < totalInputs; inindx++) {
+ int iI = r.nextInt(totalInputs);
+ // storage for each output of each layer, and the error computed for each activation.
+ float [][] activations = this.activate(inputs[iI]);
+
+ // now we have all the activations.
+ float[] calcOut = activations[layerCount-1];
+ int errlen = calcOut.length;
+ float [] error = new float[errlen];
+ for (int i = 0; i < errlen; i++) {
+ error[i] = outputs[iI][i] - calcOut[i]; // negative error
+ }
+ for (int i = layers.length - 1; i > 0; i--) {
+ error = layers[i].train(error,activations[i-1],activations[i], this.learningRate, this.momentum);
+ }
+ error = layers[0].train(error,inputs[iI],activations[0], this.learningRate, this.momentum);
+ }
+ }
+ }
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ThreadedNNTrainer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ThreadedNNTrainer.java
new file mode 100644
index 00000000..01980fb2
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ThreadedNNTrainer.java
@@ -0,0 +1,338 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+import java.util.Arrays;
+import java.util.Random;
+
+/**
+ * This class will simply learn up the NeuralNet layers, single threaded.
+ * @author redman
+ */
+public class ThreadedNNTrainer implements NNTrainingInterface {
+
+ /** the layers of the neural network. */
+ private Layer[] layers;
+
+ /** scales the weight deltas for each iteration. */
+ private float learningRate = .3f;
+
+ /** this prevents local minimum capture. */
+ private float momentum = .6f;
+
+ /** this is the number of threads we will use, by default, number of processors on the machine. */
+ private int numThreads = Runtime.getRuntime().availableProcessors();
+
+ /**
+ * Need the layer data, learning rate and momentum.
+ * @param l the layers of the neural net.
+ * @param rate the learning rate.
+ * @param mom the momentum.
+ */
+ public ThreadedNNTrainer(Layer[] l, float rate, float mom) {
+ this.layers = l;
+ this.learningRate = rate;
+ this.momentum = mom;
+ }
+
+ /**
+ * Need the layer data, learning rate and momentum.
+ * @param l the layers of the neural net.
+ * @param rate the learning rate.
+ * @param mom the momentum.
+ * @param numThreads number of threads to deploy.
+ */
+ public ThreadedNNTrainer(Layer[] l, float rate, float mom, int numThreads) {
+ this.layers = l;
+ this.learningRate = rate;
+ this.momentum = mom;
+ this.numThreads = numThreads;
+ }
+
+ /**
+ * given an input set of example, compute the output values, also return all the
+ * activation values in between, return them all. The results will be in the last
+ * vector in the returned array.
+ * @param inputs the inputs.
+ * @return the activation energies from all layers/
+ */
+ public final float [][] activate(float[] inputs) {
+
+ int layerCount = layers.length;
+
+ // storage for each output of each layer, and the error computed for each activation.
+ float [][] activations = new float[layerCount][];
+
+ // This array contains inputs from previous layer
+ float [] currentinputs = inputs;
+ for (int i = 0 ; i < layerCount ; i++) {
+
+ // compute the activations for this layer.
+ Layer layer = layers[i];
+ activations[i] = layer.activate(currentinputs);
+ currentinputs = activations[i];
+ }
+ return activations;
+ }
+
+ /**
+ * Execute the given number of epochs, then exit whatever the error.
+ * @param inputs the input examples.
+ * @param outputs the labels.
+ * @param epochs the number of iterations to perform.
+ */
+ @Override
+ final public void train(float[][] inputs, float[][] outputs, int epochs) {
+ // error checking.
+ if (inputs.length != outputs.length)
+ throw new RuntimeException("There must be the same number of input data records and output data records.");
+
+ // iterate this number of times.
+ int numExamples = inputs.length;
+
+ // For each layer, compute the ranges of indices to operate on. This will allow us to
+ // continue computing on a thread without handshakes.
+ int ll = layers.length;
+ Range[][] ranges = new Range[ll][];
+ for (int i = 0; i < ll ; i++) {
+ Layer l = layers[i];
+ int no = l.getNumberOutputs();
+ int increment = no / numThreads;
+ int onsies;
+ if (increment == 0) {
+ onsies = no;
+ ranges[i] = new Range[onsies];
+ } else {
+ onsies = no % numThreads;
+ ranges[i] = new Range[numThreads];
+ }
+ int start = 0;
+ for (int j = 0 ; j < ranges[i].length && start < no; j++) {
+ int end = start + increment;
+ if (onsies != 0) {
+ end++;
+ onsies--;
+ }
+ ranges[i][j] = new Range(start, end);
+ start = end;
+ }
+ }
+
+ // create the threads to run against the activation mux.
+ ActThread[] actThreads = new ActThread[numThreads];
+ for (int i = 0; i < numThreads; i++) {
+ actThreads[i] = new ActThread();
+ actThreads[i].start();
+ }
+
+ // create the threads to run against the activation mux.
+ LearnerThread[] learnerThreads = new LearnerThread[numThreads];
+ for (int i = 0; i < numThreads; i++) {
+ learnerThreads[i] = new LearnerThread(this.learningRate, this.momentum);
+ learnerThreads[i].start();
+ }
+
+ // set up our counts.
+ int layerCount = layers.length;
+
+ // storage for each output of each layer, and the error computed for each activation.
+ float[][] activations = new float[layerCount][];
+ for (int i = 0; i < layerCount; i++) {
+ activations[i] = new float[layers[i].getNumberOutputs()];
+ }
+
+ Thread.yield();
+ Thread.yield();
+ Thread.yield();
+ Random r = new Random(34565);
+
+ // do the specified number of epochs.
+ for (int epoch = 0; epoch < epochs; epoch++) {
+ for (int inindx = 0; inindx < numExamples; inindx++) {
+ int iI = r.nextInt(numExamples);
+
+ // zero activations
+ for (int i = 0; i < layerCount; i++) {
+ Arrays.fill(activations[i], 0.0f);
+ }
+
+ // This array contains inputs from previous layer output
+ float[] currentinputs = inputs[iI];
+
+ // for each layer, do the activations.
+ for (int i = 0; i < layerCount; i++) {
+ Layer layer = layers[i];
+
+ // set up the threads
+ float[] acts = activations[i];
+ int rl = ranges[i].length;
+ for (int j = 0; j < rl; j++) {
+ actThreads[j].setup(currentinputs, layer, acts);
+ actThreads[j].setRange(ranges[i][j]);
+ }
+
+ // wait for them to finish.
+ for (int j = 0; j < rl; j++) {
+ actThreads[j].waitIdle();
+ }
+ currentinputs = acts;
+ }
+
+ //////////////////////////////////
+ // compute output errors.
+ // now we have all the activations, lets do error propogation.
+ float[] calcOut = activations[layerCount - 1];
+ int errlen = calcOut.length;
+ float[] error = new float[errlen];
+ for (int i = 0; i < errlen; i++) {
+ error[i] = outputs[iI][i] - calcOut[i]; // negative error
+ }
+
+ //////////////////////////////////
+ // propogate the errors back and adjust weights.
+ // now learn from out errors.
+ for (int i = layerCount - 1; i > 0; i--) {
+ Layer layer = layers[i];
+ int nI = layer.getNumberInputs() + 1/*for the bias*/;
+ float[] nextError = new float[nI];
+
+ // set up the threads
+ int rl = ranges[i].length;
+ for (int j = 0; j < rl; j++) {
+ learnerThreads[j].setup(error, activations[i - 1], activations[i], nextError, layer);
+ learnerThreads[j].setRange(ranges[i][j]);
+ }
+
+ // wait for complete, then set up next layer run.
+ // wait for them to finish.
+ for (int j = 0; j < rl; j++) {
+ learnerThreads[j].waitIdle();
+ }
+
+ // now we must sum all the errors for each of the threads.
+ int esize = nextError.length;
+ for (int ei = 0; ei < esize; ei++) {
+ for (int j = 0; j < rl; j++) {
+ nextError[ei] += learnerThreads[j].errorWorkspace[ei];
+ }
+ }
+ error = nextError;
+ }
+
+ // The setup for the first layer is computed using the actual inputs, so we do this
+ // a bit differently.
+ Layer layer = layers[0];
+ int rl = ranges[0].length;
+ int nI = layer.getNumberInputs() + 1/*for the bias*/;
+ float[] nextError = new float[nI];
+ for (int j = 0; j < rl; j++) {
+ learnerThreads[j].setup(error, inputs[iI], activations[0], nextError, layer);
+ learnerThreads[j].setRange(ranges[0][j]);
+ }
+
+ // wait for complete, then set up next layer run.
+ // wait for them to finish.
+ for (int j = 0; j < rl; j++) {
+ learnerThreads[j].waitIdle();
+ }
+ }
+
+ // check for convergence.
+ float sumerr = 0;
+ for (int inputIdx = 0; inputIdx < outputs.length; inputIdx++) {
+
+ // storage for each output of each layer, and the error computed for each activation.
+ float [][] a = this.activate(inputs[inputIdx]);
+ float[] outs = a[layerCount-1];
+ float pred = outs[0];
+ float label = outputs[inputIdx][0];
+ sumerr = pred > label ? pred - label : label - pred;
+ }
+ System.out.format("%d) error = %.18f\n",epoch,(sumerr/(float)outputs.length));
+ }
+ }
+
+ /** just holds range of datums to operate on. */
+ static class Range {
+ int start;
+ int end;
+ Range(int s, int e) {
+ start = s;
+ end = e;
+ }
+ public String toString() {
+ return start+"-"+end;
+ }
+ }
+
+ /**
+ * this class coordinates the activities of a set of threads by handing
+ * out indexes that need operated on in a threadsafe way. If a request is made
+ * for an index, and non are available, the thread will wait until notified.
+ * @author redman
+ */
+ static class Multiplexer {
+
+ /** these are the ranges for the layer we operate on, these inited once and reused each epoch. */
+ private Range[] ranges = null;
+
+ /** the number of elements we are counting down from. */
+ private int count = 0;
+
+ /** number of threads operating. */
+ private int waiting = 0;
+
+ /** the number of threads sharing this multiplexer. */
+ private int numThreads = 0;
+
+ /**
+ * We need the number of elements in the layer to operate on.
+ * @param numThreads the total number of threads.
+ */
+ Multiplexer(int numThreads) {
+ this.numThreads = numThreads;
+ }
+
+ /**
+ * Start this process. This should be called by the main thread where
+ * coordination occures. This will be accessed by the done method.
+ * @param ranges the range of indices to operate on.
+ * @param compLock use this as a semaphor
+ */
+ synchronized void startAndWait(Range[] ranges) {
+ this.count = 0;
+ this.ranges = ranges;
+ this.waiting = 0;
+ this.notifyAll();
+ while (waiting != numThreads) {
+ try {
+ this.wait();
+ } catch (InterruptedException e1) {
+ }
+ }
+ }
+
+ /**
+ * get the next available index, or block till one is available.
+ * @return the index.
+ */
+ synchronized Range getNextIndex() {
+ while (ranges == null || count == ranges.length) {
+ try {
+ this.waiting++;
+ if (waiting == numThreads)
+ this.notifyAll();
+ this.wait();
+ this.waiting--;
+ } catch (InterruptedException e) {
+ }
+ }
+ return ranges[count++];
+ }
+ }
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/package-info.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/package-info.java
new file mode 100644
index 00000000..c8623369
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/package-info.java
@@ -0,0 +1,21 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+/**
+ * This package contains the Neural Network implemented employed by LBJava. This
+ * implementation supports bias, momentum and back prop, and is designed with
+ * efficiency in mind. The implementation contract includes an API for trainers
+ * {@see NNTrainingInterface} that defines the API for the any trainers. A single
+ * threaded trainer is provided. There is also a multithreaded trainer, which helps
+ * when there are a very large number of weights between layers.
+ *
+ * There is also a {@see Layer} class which implements functionality specific
+ * to neural net layers within the system. However, there is no representation of
+ * neuron within the API, this was decided upon to ensure good performance.
+ * @author redman
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java
index 67545bf3..b530eb9a 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java
@@ -7,6 +7,7 @@
*/
package edu.illinois.cs.cogcomp.lbjava.util;
+import java.lang.reflect.Array;
import java.util.Arrays;
import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream;
@@ -190,6 +191,32 @@ public Feature remove(int i) {
}
+ /**
+ * Remove all the features specfied by the indices. This is MUCH faster
+ * than removing them one at a time.
+ *
+ * @param indexes The indexes of the elements to remove.
+ **/
+ public void remove(int[] indexes) {
+ Arrays.sort(indexes);
+ int sourceindex = 0;
+ int discardindex = 0;
+ for (int targetindex = 0; targetindex < size; targetindex++) {
+ if (discardindex < indexes.length && targetindex == indexes[discardindex]) {
+ // skip this one (by simply not coping it and not inc the sourceindex), inc discardindex
+ discardindex++;
+ } else {
+ vector[sourceindex] = vector[targetindex];
+ sourceindex++;
+ }
+ }
+ if (discardindex != indexes.length)
+ // this should nver happen.
+ throw new RuntimeException("There was a problem removing some of the indexes!");
+ size -= indexes.length;
+ }
+
+
/** Returns the value of {@link #size}. */
public int size() {
return size;
diff --git a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java
new file mode 100644
index 00000000..e5202d51
--- /dev/null
+++ b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java
@@ -0,0 +1,20 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava;
+
+import static org.junit.Assert.*;
+
+import org.junit.Test;
+
+public class SparseNetworkLearningPruneTest {
+
+ @Test
+ public void test() {
+ }
+
+}
diff --git a/pom.xml b/pom.xml
index 19dea5ef..db0a0881 100644
--- a/pom.xml
+++ b/pom.xml
@@ -7,7 +7,7 @@
edu.illinois.cs.cogcomp
lbjava-project
pom
- 1.2.26
+ 1.3.1
lbjava
@@ -33,11 +33,11 @@
CogcompSoftware
CogcompSoftware
- scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/m2repo
+ scp://legolas.cs.illinois.edu:/srv/data/cogcomp/html/m2repo
CogcompSoftwareDoc
- scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/software/doc/${project.artifactId}
+ scp://legolas.cs.illinois.edu:/srv/data/cogcomp/html/software/doc/