diff --git a/.gitignore b/.gitignore
index e60ae439..c12f7871 100644
--- a/.gitignore
+++ b/.gitignore
@@ -63,3 +63,4 @@ lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/spam/SpamC
 lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/MyFeatures.java
 lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/MyLabel.java
 lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/SGDClassifier.java
+/.metadata/
diff --git a/lbjava-examples/pom.xml b/lbjava-examples/pom.xml
index e236e380..804e89c8 100755
--- a/lbjava-examples/pom.xml
+++ b/lbjava-examples/pom.xml
@@ -3,7 +3,7 @@
     <parent>
         <artifactId>lbjava-project</artifactId>
         <groupId>edu.illinois.cs.cogcomp</groupId>
-        <version>1.2.26</version>
+        <version>1.3.1</version>
     </parent>
 
     <modelVersion>4.0.0</modelVersion>
@@ -27,12 +27,12 @@
         <dependency>
             <groupId>edu.illinois.cs.cogcomp</groupId>
             <artifactId>LBJava</artifactId>
-            <version>1.2.26</version>
+            <version>1.3.1</version>
         </dependency>
         <dependency>
             <groupId>edu.illinois.cs.cogcomp</groupId>
             <artifactId>lbjava-maven-plugin</artifactId>
-            <version>1.2.26</version>
+            <version>1.3.1</version>
         </dependency>
     </dependencies>
 
@@ -63,7 +63,7 @@
             <plugin>
                 <groupId>edu.illinois.cs.cogcomp</groupId>
                 <artifactId>lbjava-maven-plugin</artifactId>
-                <version>1.2.26</version>
+                <version>1.3.1</version>
                 <configuration>
                     <gspFlag>${project.basedir}/src/main/java</gspFlag>
                     <dFlag>${project.basedir}/target/classes</dFlag>
@@ -77,6 +77,7 @@
                         </goals>
                         <configuration>
                             <lbjavaInputFileList>
+                                <param>${project.basedir}/src/main/lbj/NNBrownClassifier.lbj</param>
                                 <param>${project.basedir}/src/main/lbj/BadgesClassifier.lbj</param>
                                 <param>${project.basedir}/src/main/lbj/SentimentClassifier.lbj</param>
                                 <param>${project.basedir}/src/main/lbj/SetCover.lbj</param>
diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/BrownReader.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/BrownReader.java
new file mode 100644
index 00000000..e29ef775
--- /dev/null
+++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/BrownReader.java
@@ -0,0 +1,238 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Arrays;
+
+import edu.illinois.cs.cogcomp.lbjava.parse.Parser;
+
+/**
+ * @author redman
+ */
+public class BrownReader implements Parser{
+	
+	/** the input data. */
+	float [][] inputs;
+	
+	/** the labels. */
+	float [][] outputs;
+	
+	/** indexes the current example. */
+	int index = 0;
+	
+	/** the maximum number of input features. */
+    int inputCardinality = -1;
+
+	/** the maximum integer classification. */
+    int outputCardinality = 1;
+
+	/**
+	 * read input data from the input file, the output data from the out file.
+	 * @param infile the input data.
+	 * @param outfile the output data.
+	 * @throws IOException 
+	 */
+	public BrownReader (String infile) {
+		try {
+			inputs = getExampleInputs(infile);
+			this.inputCardinality = inputs[0].length;
+			outputs = getExampleOutputs(infile);
+			if (inputs.length != outputs.length)
+				throw new RuntimeException("Need the same number of inputs and outputs.");
+		} catch (IOException e) {
+			throw new RuntimeException("Could not read example data.",e);
+		}
+	}
+	
+	/**
+	 * read input data from the input file, the output data from the out file.
+	 * @param infile the input data.
+	 * @param trainingInputs the previously read training inputs.
+	 * @throws IOException 
+	 */
+	public BrownReader (String infile, int numberInputFeatures, int numberExamples) {
+		try {
+			this.inputCardinality = numberInputFeatures;
+			inputs = getExampleInputs(infile, numberInputFeatures);
+			outputs = getExampleOutputs(infile, inputs.length, numberExamples);
+			if (inputs.length != outputs.length)
+				throw new RuntimeException("Need the same number of inputs and outputs.");
+		} catch (IOException e) {
+			throw new RuntimeException("Could not read example data.",e);
+		}
+	}
+
+	@Override
+	public void close() {	
+		index = 0;
+	}
+	
+	@Override
+	public Object next() {
+		NeuralNetExample nne = null;
+		if (index < inputs.length) {
+			nne = new NeuralNetExample(inputs[index], outputs[index]);
+			index++;
+		}
+		return nne;
+	}
+	
+	@Override
+	public void reset() {
+		index = 0;
+	}
+
+    /**
+     * get the examples form an NIST dataset, return everything at once. There are
+     * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats = 
+     * 47 million floats. These are input examples, so they are image data.
+     * @param filename
+     * @return the input examples.
+     * @throws IOException 
+     */
+    private float[][] getExampleInputs(String filename) throws IOException {
+        int count = 0;
+        try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) {
+            String line;
+            while ((line=br.readLine()) != null) {
+                count++;
+                String[] splits = line.split("[,:]");
+                for (int i = 1; i < splits.length; i++) {
+                    int featureindex = Integer.parseInt(splits[i]);
+                    if (featureindex > this.inputCardinality) 
+                    	this.inputCardinality = featureindex;
+                }
+            }
+        }
+        float[][] data = new float[count][++this.inputCardinality];
+        for (float[] a : data)
+            Arrays.fill(a, 0.0f);
+        
+        try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) {
+            String line;
+            count = 0;
+            while ((line=br.readLine()) != null) {
+                String[] splits = line.split("[,:]");
+                for (int i = 0; i < splits.length; i++) {
+                    int featureindex = Integer.parseInt(splits[i]);
+                    data[count][featureindex] = 1.0f;
+                }
+                count++;
+            }
+        }
+        return data;
+    }
+    
+    /**
+     * scale the range of input feature vector to the provided example set, of data to train on.
+     * @param string
+     * @param examples
+     * @return the testing input deck.
+     * @throws IOException 
+     * @throws FileNotFoundException 
+     */
+    private float[][] getExampleInputs(String filename, int cardinality) throws FileNotFoundException, IOException {
+        int count = 0;
+        try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) {
+            String line;
+            while ((line=br.readLine()) != null) {
+                count++;
+                String[] splits = line.split("[,:]");
+                for (int i = 1; i < splits.length; i++) {
+                    int featureindex = Integer.parseInt(splits[i]);
+                    if (featureindex > this.inputCardinality) 
+                    	this.inputCardinality = featureindex;
+                }
+            }
+        }
+        float[][] data = new float[count][cardinality];
+        for (float[] a : data)
+            Arrays.fill(a, 0.0f);
+        try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) {
+            String line;
+            count = 0;
+            while ((line=br.readLine()) != null) {
+                String[] splits = line.split("[,:]");
+                for (int i = 0; i < splits.length; i++) {
+                    int featureindex = Integer.parseInt(splits[i]);
+                    data[count][featureindex] = 1.0f;
+                }
+                count++;
+            }
+        }
+        return data;
+    }
+
+    /**
+     * get the examples form an NIST dataset, return everything at once. There are
+     * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats = 
+     * 47 million floats. These are input examples, so they are image data.
+     * @param filename
+     * @return the input examples.
+     * @throws IOException 
+     */
+    private float[][] getExampleOutputs(String filename) throws IOException {
+        int count = 0;
+        this.outputCardinality = -1;
+        try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) {
+            String line = null;
+            while ((line=br.readLine()) != null) {
+                count++;
+                String[] splits = line.split("[,:]");
+                int label = Integer.parseInt(splits[0]);
+                if (label > this.outputCardinality) 
+                	this.outputCardinality = label;
+            }
+        }
+        float[][] data = new float[count][1];
+        for (float[] a : data)
+            Arrays.fill(a, 0.0f);
+        try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) {
+            String line;
+            count = 0;
+            float range = this.outputCardinality;
+            while ((line=br.readLine()) != null) {
+                String[] splits = line.split("[,:]");
+                int featureindex = Integer.parseInt(splits[0]);
+                data[count][0] = featureindex/range;
+                count++;
+            }
+        }
+        return data;
+    }
+    
+    /** 
+     * get the example outputs.
+     * @param filename file with the values.
+     * @param outputs the training examples.
+     * @return the testing examples.
+     * @throws FileNotFoundException
+     * @throws IOException
+     */
+    private float[][] getExampleOutputs(String filename, int numouts, int card) throws FileNotFoundException, IOException {
+        float[][] data = new float[numouts][1];
+        try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) {
+            String line;
+            int count = 0;
+            float range = card;
+            while ((line=br.readLine()) != null) {
+                String[] splits = line.split("[,:]");
+                int featureindex = Integer.parseInt(splits[0]);
+                // convert to a number 0 - 1, then to a number -1 to 1.
+                data[count][0] = featureindex/range;
+                count++;
+            }
+        }
+        return data;
+    }
+}
diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/Debug.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/Debug.java
new file mode 100644
index 00000000..13f51eaf
--- /dev/null
+++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/Debug.java
@@ -0,0 +1,238 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet;
+
+import edu.illinois.cs.cogcomp.lbjava.classify.Classifier;
+
+/**
+ * This was used for debugging during development, thought it might be useful in the future
+ * although it is completely useless right now. 
+ * @author redman
+ */
+@SuppressWarnings("unused")
+public class Debug {
+	/** running ANN by default. */
+	static private final String NN = "NeuralNet";
+	
+	/** running gradient descent. */
+	static private final String SGD = "StoichasticGradientDescent";
+	
+	/** the method we are running. */
+    static private String method = NN;
+    
+    /** scales the weight deltas for each iteration. */
+    static private float learningRate = .3f;
+    
+    /** this prevents local minimum capture. */
+    static private float momentum = .6f;
+
+    /** this prevents local minimum capture. */
+    static private int hiddenLayerSize = 20;
+
+    /** this prevents local minimum capture. */
+    static private int epochs = 100;
+
+    /** The number of threads to support. */
+    @SuppressWarnings("unused")
+	static private int threads = 1;
+    
+    /** 
+     * parse the arguments.
+     * @param args the command arguments.
+     */
+    static private void parseArgs(String[] args) {
+        for (int i = 0; i < args.length; i++) {
+            if (args[i].equals("-l"))
+                learningRate = Float.parseFloat(args[++i]);
+            else if (args[i].equals("-m"))
+                momentum = Float.parseFloat(args[++i]);
+            else if (args[i].equals("-e"))
+                epochs = Integer.parseInt(args[++i]);
+            else if (args[i].equals("-t"))
+                threads = Integer.parseInt(args[++i]);
+            else if (args[i].equals("-h"))
+                hiddenLayerSize = Integer.parseInt(args[++i]);
+			else if (args[0].equals("-gd"))
+				method = SGD;
+            else if (args[i].equals("-help")) {
+                System.out.println("-t the number of threads to deploy.\n"
+                		+ "-l the learning rate.\n"
+                		+ "-m momentum.\n"
+                		+ "-e number of epochs.\n"
+                		+ "-h hidden layer size.\n"
+                		+ "-gd use gradient descent.\n"
+                		+ "-help this output.");
+                System.exit(0);
+            } else
+                System.out.println("Unexpected argument : "+args[i]);
+        }
+    }
+
+    /**
+     * Print a set of any pair of floating point arrays, labels can be passed in, if
+     * null is passed for the ol parameter, no second array is printed.
+     * @param il the input label.
+     * @param input the input vector.
+     * @param ol the output label.
+     * @param output the output vector.
+     */
+    static void printInOut(String il, float[] input, String ol, float[] output) {
+        System.out.print(il+" ");
+        for (float in : input) {
+            System.out.format(" %.18f",in);
+        }
+        if (ol!=null) {
+            System.out.print(" "+ol+" ");
+            for (float in : output) {
+                System.out.format(" %.18f",in);
+            }
+        }
+        System.out.println();
+    }
+    
+    /**
+     * Print the input and outputs all on one line.
+     * @param il the input label.
+     * @param input the input vector.
+     * @param ol the output label.
+     * @param output the output vector.
+     */
+    static void printInOutC(String il, float[] input, String ol, float[] output) {
+        System.out.println(il+" ");
+        int c = 0;
+        for (float in : input) {
+            System.out.format(c+il+": %.18f\n",in);
+            c++;
+        }
+        if (ol!=null) {
+            System.out.println(" "+ol+" ");
+            c = 0;
+            for (float in : output) {
+                System.out.format(c+ol+": %.18f\n",in);
+                c++;
+            }
+        }
+    }
+    
+    /**
+     * Compute the value, compare to the label, and accumulate predicted error.
+     * @param br the brown data reader.
+     * @param classifier the learner.
+     */
+    static double computeHits (BrownReader br, Classifier classifier) {
+    	int i = 0;
+    	int bads = 0;
+    	while (true) {
+    		NeuralNetExample nne = (NeuralNetExample)br.next();
+    		if (nne == null) {
+    			// done;
+                return (1.0f - ((double)bads/(double)i)) * 100f;
+    		} else {
+    			double value = classifier.realValue(nne);
+    			double tru = nne.getOutputLabels()[0];
+    			double abserr = Math.abs(value - tru);
+    			if (abserr > .25) {
+                    bads++;
+    			}
+                i++;
+    		}
+    		
+    	}
+    }
+    
+	/**
+	 * @param args
+	 
+	public static void main(String[] args) {
+		parseArgs(args);
+		if (method == NN) {
+			// read the data to know how many input features there are.
+			BrownReader br = new BrownReader("data/brown/their-brown80.feat");
+			
+			// first create the classifier and train it up.
+			NNBrownDataClassifier nn = new NNBrownDataClassifier();
+			nn.setInputCount(br.inputCardinality);
+			nn.setHiddenCount(hiddenLayerSize);
+			nn.setOutputCount(1);
+			nn.setEpochs(epochs);
+			nn.setMomentum(momentum);
+			nn.setLearningRate(learningRate);
+			nn.forget();
+
+			int epochs = nn.getEpochs();
+			long time = System.currentTimeMillis();
+			// read training data. 
+			try  {
+				// train.
+				ArrayList<Object> trainingExamples = new ArrayList<>();
+				while(true) {
+					Object o = br.next();
+					trainingExamples.add(o);
+					if (o == null)
+						break;
+					nn.learn(o);
+				}
+				Random r = new Random();
+				for(int i = 0 ; i < epochs-1; i++) {
+					for (int j = 0; j < trainingExamples.size(); j++) {
+						int oidx = r.nextInt(trainingExamples.size());
+						Object o = trainingExamples.get(oidx);
+						if (o == null)
+							break;
+						nn.learn(o);
+					}
+				}
+
+			} finally {
+				br.close();
+			}
+			
+			// now we have a trained up model, let's test it.
+			br = new BrownReader("data/brown/their-brown20.feat",br.inputs[0].length, br.outputCardinality);
+			double accuracy = computeHits(br, nn);
+			double seconds = ((System.currentTimeMillis() - time)/1000.0);
+			
+			// epochs, rate, momentum, hiddens, accuracy, time
+            System.out.format("%d,%.2f,%.2f,%d,%.4f,%.4f\n",epochs,learningRate,momentum,hiddenLayerSize,accuracy,seconds);
+		} else {
+			
+			// first create the classifier and train it up.
+			SGDBrownDataClassifier sdg = new SGDBrownDataClassifier();
+			sdg.forget();
+			Learner.Parameters p = sdg.getParameters();
+			p.rounds = epochs;
+			
+			System.out.println("Reading data SGD");
+			BrownReader br = new BrownReader("data/brown/their-brown80.feat");
+			ArrayList<Object> trainingExamples = new ArrayList<>();
+			while(true) {
+				Object o = br.next();
+				trainingExamples.add(o);
+				if (o == null)
+					break;
+				sdg.learn(o);
+			}
+			System.out.println("Training SGD");
+			Random r = new Random();
+			for(int i = 0 ; i < p.rounds-1; i++) {
+				for (int j = 0; j < trainingExamples.size(); j++) {
+					int oidx = r.nextInt(trainingExamples.size());
+					Object o = trainingExamples.get(oidx);
+					if (o == null)
+						break;
+					sdg.learn(o);
+				}
+			}
+			System.out.println("Training up done.");
+			
+			// now we have a trained up model, let's test it.
+			br = new BrownReader("data/brown/their-brown20.feat",br.inputs[0].length, br.inputs.length);
+			computeHits(br, sdg);
+		}
+	}*/
+}
diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetExample.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetExample.java
new file mode 100644
index 00000000..065e0659
--- /dev/null
+++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetExample.java
@@ -0,0 +1,48 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet;
+
+/**
+ * Data container for LBJava.
+ * @author redman
+ */
+public class NeuralNetExample {
+	
+	/** the inputs. */
+	public float[] inputs;
+	
+	/** the labeled data. */
+	public float[] outputs;
+	
+	/** 
+	 * create with inputs and outputs.
+	 * 
+	 * @param ins
+	 * @param outs
+	 */
+	NeuralNetExample(float[] ins, float [] outs) {
+		this.inputs = ins;
+		this.outputs = outs;
+	}
+	
+	/**
+	 * Get the input features.
+	 * @return input features.
+	 */
+	public float[] getInputFeatures() {
+		return inputs;
+	}
+
+	/**
+	 * @return the output features(truth data).
+	 */
+	public float[] getOutputLabels() {
+		return outputs;
+	}
+
+}
diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetwork.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetwork.java
new file mode 100644
index 00000000..dc5caa2d
--- /dev/null
+++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetwork.java
@@ -0,0 +1,210 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet;
+
+import java.io.IOException;
+import java.util.Random;
+
+import edu.illinois.cs.cogcomp.lbjava.neuralnet.Activator;
+import edu.illinois.cs.cogcomp.lbjava.neuralnet.Layer;
+import edu.illinois.cs.cogcomp.lbjava.neuralnet.NNTrainingInterface;
+import edu.illinois.cs.cogcomp.lbjava.neuralnet.SimpleNNTrainer;
+import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer;
+
+/**
+ * This class will manage a neural network, it will train it up if necessary, create
+ * and manage all the layers and nodes internally, and respond to activations.
+ * @author redman
+ */
+public class NeuralNetwork implements Activator {
+
+    /** debug flag. */
+    static final boolean debug = false;
+    
+    /** the layers of the neural network. */
+    private Layer[] layers;
+    
+    /** scales the weight deltas for each iteration. */
+    static private float learningRate = .3f;
+    
+    /** this prevents local minimum capture. */
+    static private float momentum = .6f;
+
+    /** this prevents local minimum capture. */
+    static private int hiddenLayerSize = 20;
+
+    /** this prevents local minimum capture. */
+    static private int epochs = 100;
+
+    /** this prevents local minimum capture. */
+    static private int threads = 1;
+
+    /** 
+     * parse the arguments.
+     * @param args the command arguments.
+     */
+    static private void parseArgs(String[] args) {
+        for (int i = 0; i < args.length; i++) {
+            if (args[i].equals("-l"))
+                learningRate = Float.parseFloat(args[++i]);
+            else if (args[i].equals("-m"))
+                momentum = Float.parseFloat(args[++i]);
+            else if (args[i].equals("-e"))
+                epochs = Integer.parseInt(args[++i]);
+            else if (args[i].equals("-t"))
+                threads = Integer.parseInt(args[++i]);
+            else if (args[i].equals("-h"))
+                hiddenLayerSize = Integer.parseInt(args[++i]);
+            else if (args[i].equals("-help")) {
+                System.out.println("-t the number of threads to deploy.\n-l the learning rate.\n-m momentum.\n-e number of epochs.\n-h hidden layer size.");
+                System.exit(0);
+            } else
+                System.out.println("Unexpected argument : "+args[i]);
+        }
+    }
+    /**
+     * Given the number of input layers and outputs, and the sizes of all layers, 
+     * set up an untrained neural net.
+     * @param layerSizes the number of neurons in each layer, also corresponds to the number of outputs of that layer.
+     * @param learningRate the learning rage.
+     * @param momentum the momentum.
+     */
+    NeuralNetwork(int[] layerSizes) {
+        layers = new Layer[layerSizes.length-1];
+
+        // each layer has a number of inputs defined by the outputs of the previous layer, or
+        // the number inputs passed in, outputs is the number of neurons in the layer since each 
+        // neuron produces one output.
+        Random r = new Random (1234);
+        for (int i = 0; i < layerSizes.length-1; i++) {
+            this.layers[i] = new Layer(layerSizes[i], layerSizes[i+1], r);
+        }
+    }
+    
+    /**
+     * @see edu.illinois.cs.cogcomp.lbjava.neuralnet.Activator#activateLayers(float[], edu.illinois.cs.cogcomp.lbjava.neuralnet.Layer[])
+     */
+    @Override
+    public float[] prediction(float[] inputs) {
+        // set up our counts.
+        int layerCount = layers.length;
+        
+        // storage for each output of each layer, and the error computed for each activation.
+        float [][] activations = new float[layerCount][];
+        
+        // This array contains inputs from previous layer
+        float [] currentinputs = inputs;
+        for (int i = 0 ; i < layerCount ; i++) {
+            
+            // compute the activations for this layer.
+            Layer layer = layers[i];
+            activations[i] = layer.activate(currentinputs);
+            currentinputs = activations[i];
+        }
+        return activations[layerCount-1];
+    }
+
+    /** 
+     * Train up the NN model given training data, a learner algorith,
+     * and convergence criteria.
+     * @param inputs the input data.
+     * @param outputs the desired output.
+     * @param learner the learning algorithm.
+     * @param epochs number of iterations to run.
+     * @param converge the convergence criteria.
+     */
+    public void train(float[][] inputs, float[][]outputs, NNTrainingInterface learner, int epochs) {
+        if (inputs.length != outputs.length)
+            throw new RuntimeException("There must be the same number of input data records and output data records to train.");
+        learner.train(inputs, outputs, epochs);
+    }
+    
+    /**
+     * Test will try learning an XOR model.
+     * @param args
+     * @throws IOException 
+     */
+    public static void main(String[] args) throws IOException {
+        parseArgs(args);
+        float[][] examples = null;
+        float[][] outputs = null;
+        float[][] texamples = null;
+        float[][] toutputs = null;
+        int [] hls = null;
+        int outputrange = 0;
+        if (args.length != 0) {
+            int [] thls = {28*28, hiddenLayerSize, 1};
+            hls = thls;
+            System.out.println("reading data from disk.");
+            /*examples = DatasetReader.getExampleInputs("./data/NIST/train-images-idx3-ubyte");
+            outputs = DatasetReader.getExampleOutputs("./data/NIST/train-labels-idx1-ubyte");
+            texamples = DatasetReader.getExampleInputs("./data/NIST/t10k-images-idx3-ubyte");
+            toutputs = DatasetReader.getExampleOutputs("./data/NIST/t10k-labels-idx1-ubyte");
+            */
+            BrownReader br = new BrownReader("data/brown/their-brown80.feat");
+            examples = br.inputs;
+            outputs = br.outputs;
+            outputrange = br.outputCardinality;
+            br = new BrownReader("data/brown/their-brown20.feat", examples[0].length, br.outputCardinality);
+            texamples = br.inputs;
+            toutputs = br.outputs;
+            thls[0] = examples[0].length;
+            thls[2] = outputs[0].length;
+        } else {
+            int [] thls = {2, 2, 1};
+            hls = thls;
+            examples = new float[][] { new float[] { 0, 0 }, new float[] { 0, 1 }, new float[] { 1, 0 }, new float[] { 1, 1 } };
+            outputs = new float[][] { new float[] { 0 }, new float[] { 1 }, new float[] { 1 }, new float[] { 0 } };
+            texamples = new float[][] { new float[] { 0, 0 }, new float[] { 0, 1 }, new float[] { 1, 0 }, new float[] { 1, 1 } };
+            toutputs = new float[][] { new float[] { 0 }, new float[] { 1 }, new float[] { 1 }, new float[] { 0 } };
+        }
+        int good = 0;
+        {
+            System.out.println("Start run: epochs="+epochs+" lr="+learningRate+" mom="+momentum+" hidden="+hiddenLayerSize+" threads:"+threads);
+            NeuralNetwork nn = new NeuralNetwork(hls);
+            NNTrainingInterface learner = null;
+            if (threads <= 1) {
+            	    learner = new SimpleNNTrainer(nn.layers, learningRate, momentum);
+            } else {
+            	    learner = new ThreadedNNTrainer(nn.layers, learningRate, momentum);
+            }
+            long time = System.currentTimeMillis();
+            learner.train(examples, outputs, epochs);
+            time = (System.currentTimeMillis() - time)/1000l;
+            System.out.format("Took %d to train up a simple model, on to testing.\n",time);
+            System.out.println("\nCompute accuracy against training");
+            
+            // provide some output now.
+            for (int inputIdx = 0; inputIdx < examples.length; inputIdx++) {
+                float[] outs = nn.prediction(examples[inputIdx]);
+                float pred = outs[0]*outputrange;
+                float label = outputs[inputIdx][0]*outputrange;
+                if (Math.round(pred) == Math.round(label)) {
+                    good++;
+                }
+            }
+            System.out.format("Of %d, %d were good, accuracy %.4f",examples.length, good, ((float)good/(float)examples.length));
+            good = 0;
+            System.out.println("\nCompute accuracy against hold out set.");
+            
+            // provide some output now.
+            for (int inputIdx = 0; inputIdx < texamples.length; inputIdx++) {
+                float[] outs = nn.prediction(texamples[inputIdx]);
+                float pred = outs[0]*outputrange;
+                float label = toutputs[inputIdx][0]*outputrange;
+                if (Math.round(pred) == Math.round(label)) {
+                    System.out.format("+ %d label %.10f pred %.10f\n", inputIdx,label,pred);
+                    good++;
+                } else {
+                    System.out.format("- %d label %.10f pred %.10f\n", inputIdx,label,pred);
+                }
+            }
+            System.out.format("Of %d, %d were good, accuracy %.4f",texamples.length, good, ((float)good/(float)texamples.length));
+        }
+    }
+}
diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/README.md b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/README.md
new file mode 100755
index 00000000..5fc8dbe6
--- /dev/null
+++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/README.md
@@ -0,0 +1,13 @@
+---
+title: Badges
+authors: Vivek Srikumar, Stephen Mayhew, Daniel Khashabi
+lead: Classify a simple dataset of names
+layout: page
+---
+
+This classifier does spelling correction, it uses data which can be found on the CogComp web site, 
+do a search for "Brown Corpus Data for Context Sensitive Spelling Correction" to find this data. The
+data must be placed in the directory where you run the training process for this to work.
+
+Training can be done by simply running the NeuralNetwork class manually
+
diff --git a/lbjava-examples/src/main/lbj/NNBrownClassifier.lbj b/lbjava-examples/src/main/lbj/NNBrownClassifier.lbj
new file mode 100644
index 00000000..49118805
--- /dev/null
+++ b/lbjava-examples/src/main/lbj/NNBrownClassifier.lbj
@@ -0,0 +1,39 @@
+package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet;
+
+import edu.illinois.cs.cogcomp.lbjava.neuralnet.*;
+
+/** 
+The input features are simple the set of real values 
+that serve as input to the neural net.
+ */
+real[] NNInputVector(NeuralNetExample input) <- {
+    float[] datapoints = input.getInputFeatures();
+    for (int i = 0; i < datapoints.length; i++) {
+	    sense datapoints[i];
+	}
+}
+
+/**
+The output vector contains the data outputs, for this class
+just one floating point number.
+*/
+real NNOutputVector(NeuralNetExample d) <- {
+    float[] datapoints = d.getOutputLabels();
+    return datapoints[0];
+}
+
+/** 
+A learned text classifier; its definition comes from data. 
+*/
+real NNBrownDataClassifier(NeuralNetExample d) <-
+  learn NNOutputVector
+  using NNInputVector
+  from new BrownReader("data/brown/their-brown80.feat")
+  with NeuralNetLearner {
+    inputCount = 4000;
+    hiddenCount=100;
+    outputCount=1;
+    learningRate=.3f;
+    momentum=.7f;
+  }
+end
\ No newline at end of file
diff --git a/lbjava-mvn-plugin/pom.xml b/lbjava-mvn-plugin/pom.xml
index 52302723..71bfa199 100644
--- a/lbjava-mvn-plugin/pom.xml
+++ b/lbjava-mvn-plugin/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <artifactId>lbjava-project</artifactId>
         <groupId>edu.illinois.cs.cogcomp</groupId>
-        <version>1.2.26</version>
+        <version>1.3.1</version>
     </parent>
 
     <artifactId>lbjava-maven-plugin</artifactId>
@@ -76,7 +76,7 @@
         <dependency>
             <groupId>edu.illinois.cs.cogcomp</groupId>
             <artifactId>LBJava</artifactId>
-            <version>1.2.26</version>
+            <version>1.3.1</version>
             <type>jar</type>
             <scope>compile</scope>
         </dependency>
@@ -177,12 +177,4 @@
         </profile>
     </profiles>
 
-    <distributionManagement>
-        <repository>
-            <id>CogcompSoftware</id>
-            <name>CogcompSoftware</name>
-            <url>scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/m2repo</url>
-        </repository>
-    </distributionManagement>
-
 </project>
diff --git a/lbjava/pom.xml b/lbjava/pom.xml
index 062fe6cf..13a58757 100644
--- a/lbjava/pom.xml
+++ b/lbjava/pom.xml
@@ -3,7 +3,7 @@
 	<parent>
         <artifactId>lbjava-project</artifactId>
         <groupId>edu.illinois.cs.cogcomp</groupId>
-        <version>1.2.26</version>
+        <version>1.3.1</version>
     </parent>
 
 	<modelVersion>4.0.0</modelVersion>
@@ -64,18 +64,6 @@
         </plugins>
     </reporting>
 
-	<distributionManagement>
-		<repository>
-			<id>CogcompSoftware</id>
-			<name>CogcompSoftware</name>
-			<url>scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/m2repo</url>
-		</repository>
-        <site>
-            <id>CogcompSoftwareDoc</id>
-            <url>scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/software/doc/${project.artifactId}</url>
-        </site>
-	</distributionManagement>
-
 	<build>
 		<extensions>
 			<extension>
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Train.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Train.java
index ab0dd37c..15a28665 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Train.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Train.java
@@ -880,53 +880,57 @@ public void run() {
             if (!lce.onlyCodeGeneration) {
                 // If there's a "from" clause, train.
                 try {
-                    if (lce.parser != null) {
-                        System.out.println("Training " + getName());
-                        if (preExtract) {
-                            preExtractAndPrune();
-                            System.gc();
-                        } else
-                            learner.saveLexicon();
-                        int trainingRounds = 1;
-
-                        if (tuningParameters) {
-                            String parametersPath = getName();
-                            if (Main.classDirectory != null)
-                                parametersPath =
-                                        Main.classDirectory + File.separator + parametersPath;
-                            parametersPath += ".p";
-
-                            Learner.Parameters bestParameters = tune();
-                            trainingRounds = bestParameters.rounds;
-                            Learner.writeParameters(bestParameters, parametersPath);
-                            System.out.println("  " + getName()
-                                    + ": Training on entire training set");
-                        } else {
-                            if (lce.rounds != null)
-                                trainingRounds = Integer.parseInt(((Constant) lce.rounds).value);
-
-                            if (lce.K != null) {
-                                int[] rounds = {trainingRounds};
-                                int k = Integer.parseInt(lce.K.value);
-                                double alpha = Double.parseDouble(lce.alpha.value);
-                                trainer.crossValidation(rounds, k, lce.splitPolicy, alpha,
-                                        testingMetric, true);
+                    learner.beginTraining();
+                    try {
+                        if (lce.parser != null) {
+                            System.out.println("Training " + getName());
+                            if (preExtract) {
+                                preExtractAndPrune();
+                                System.gc();
+                            } else
+                                learner.saveLexicon();
+                            int trainingRounds = 1;
+    
+                            if (tuningParameters) {
+                                String parametersPath = getName();
+                                if (Main.classDirectory != null)
+                                    parametersPath =
+                                            Main.classDirectory + File.separator + parametersPath;
+                                parametersPath += ".p";
+    
+                                Learner.Parameters bestParameters = tune();
+                                trainingRounds = bestParameters.rounds;
+                                Learner.writeParameters(bestParameters, parametersPath);
                                 System.out.println("  " + getName()
                                         + ": Training on entire training set");
+                            } else {
+                                if (lce.rounds != null)
+                                    trainingRounds = Integer.parseInt(((Constant) lce.rounds).value);
+    
+                                if (lce.K != null) {
+                                    int[] rounds = {trainingRounds};
+                                    int k = Integer.parseInt(lce.K.value);
+                                    double alpha = Double.parseDouble(lce.alpha.value);
+                                    trainer.crossValidation(rounds, k, lce.splitPolicy, alpha,
+                                            testingMetric, true);
+                                    System.out.println("  " + getName()
+                                            + ": Training on entire training set");
+                                }
                             }
-                        }
-
-                        trainer.train(lce.startingRound, trainingRounds);
-
-                        if (testParser != null) {
-                            System.out.println("Testing " + getName());
-                            new Accuracy(true).test(learner, learner.getLabeler(), testParser);
-                        }
-
-                        System.out.println("Writing " + getName());
-                    } else
-                        learner.saveLexicon(); // Writes .lex even if lexicon is empty.
-
+                            trainer.train(lce.startingRound, trainingRounds);
+                        } else
+                            learner.saveLexicon(); // Writes .lex even if lexicon is empty.
+                    } finally {
+                        learner.doneTraining();
+                    }
+                    
+                    if (lce.parser != null && testParser != null) {
+                        System.out.println("Testing " + getName());
+                        new Accuracy(true).test(learner, learner.getLabeler(), testParser);
+                    }
+                    
+                    // save the final model.
+                    System.out.println("Writing " + getName());
                     learner.save(); // Doesn't write .lex if lexicon is empty.
                 } catch (Exception e) {
                     System.err.println("LBJava ERROR: Exception while training " + getName() + ":");
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java
index 5705301a..1728143b 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java
@@ -66,6 +66,9 @@ public abstract class Learner extends Classifier {
 
     /** The number of candidate examples when a global object is passed here. */
     protected int candidates = 1;
+    
+    /** this is set while training. */
+    protected boolean intraining = false;
 
     /**
      * This constructor is used by the LBJava compiler; it should never be called by a programmer.
@@ -259,7 +262,6 @@ public URL getModelLocation() {
         return lcFilePath;
     }
 
-
     /**
      * Sets the location of the lexicon as a regular file on this file system.
      *
@@ -289,7 +291,6 @@ public URL getLexiconLocation() {
         return lexFilePath;
     }
 
-
     /**
      * Establishes a new feature counting policy for this learner's lexicon.
      *
@@ -304,7 +305,6 @@ public void countFeatures(Lexicon.CountPolicy policy) {
         lexicon.countFeatures(policy);
     }
 
-
     /**
      * Returns this learner's feature lexicon after discarding any feature counts it may have been
      * storing. This method is likely only useful when the lexicon and its counts are currently
@@ -320,7 +320,6 @@ public Lexicon getLexiconDiscardCounts() {
         return lexicon;
     }
 
-
     /**
      * Returns a new, emtpy learner into which all of the parameters that control the behavior of
      * the algorithm have been copied. Here, "emtpy" means no learning has taken place.
@@ -331,7 +330,6 @@ public Learner emptyClone() {
         return clone;
     }
 
-
     /**
      * Trains the learning algorithm given an object as an example. By default, this simply converts
      * the example object into arrays and passes it to {@link #learn(int[],double[],int[],double[])}
@@ -345,7 +343,6 @@ public void learn(Object example) {
                 (double[]) exampleArray[3]);
     }
 
-
     /**
      * Trains the learning algorithm given a feature vector as an example. This simply converts the
      * example object into arrays and passes it to {@link #learn(int[],double[],int[],double[])}.
@@ -633,6 +630,15 @@ public double realValue(int[] f, double[] v) {
                         + getClass().getName() + "'.");
     }
 
+    /**
+     * Start training, this might involve training many models, for cross validation, 
+     * parameter tuning and so on.
+     **/
+    public void beginTraining() {
+        intraining = true;
+    }
+
+
 
     /**
      * Overridden by subclasses to perform any required post-processing computations after all
@@ -642,6 +648,21 @@ public double realValue(int[] f, double[] v) {
     public void doneLearning() {}
 
 
+    /**
+     * Overridden by subclasses to perform any required post-training computations optimizations, 
+     * in particular, feature subset reduction. This default method does nothing.
+     */
+    public void doneTraining() {
+        if (intraining) {
+            intraining = false;
+        } else {
+            throw new RuntimeException("calling doneLearning without previously calling beginTraining"
+                + " violates the lifecycle contract. Or perhaps the subclass does not call the superclass "
+                + "method. Contact the developer.");
+        }
+    }
+
+
     /**
      * This method is sometimes called before training begins, although it is not guaranteed to be
      * called at all. It allows the number of examples and number of features to be passed to the
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Lexicon.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Lexicon.java
index 1941b64f..bb55b6a7 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Lexicon.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Lexicon.java
@@ -9,12 +9,15 @@
 
 import java.io.Serializable;
 import java.net.URL;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 
 import edu.illinois.cs.cogcomp.core.datastructures.vectors.*;
+import edu.illinois.cs.cogcomp.lbjava.classify.DiscreteConjunctiveFeature;
 import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
+import edu.illinois.cs.cogcomp.lbjava.classify.RealConjunctiveFeature;
 import edu.illinois.cs.cogcomp.lbjava.util.ByteString;
 import edu.illinois.cs.cogcomp.lbjava.util.ClassUtils;
 import edu.illinois.cs.cogcomp.lbjava.util.FVector;
@@ -305,7 +308,7 @@ public boolean contains(Feature f) {
      *
      * @param f The feature to look up.
      * @return The integer key that the feature maps to.
-     **/
+     **/ 
     public int lookup(Feature f) {
         return lookup(f, false, -1);
     }
@@ -661,6 +664,36 @@ public void discardPrunedFeatures() {
         pruneCutoff = -1;
     }
 
+    /**
+     * Discard features at the provided indices. This operation is performed
+     * last to first so we can do it in place. This method will sort the input
+     * array.
+     * @param dumpthese the indexes of the features to dump.
+     */
+    public void discardPrunedFeatures(int [] dumpthese) {
+    	Arrays.sort(dumpthese);
+    	lexiconInv.remove(dumpthese);
+        
+        // this compresses the FVector
+        lexiconInv = new FVector(lexiconInv);
+        if (lexicon != null) {
+            
+            // reconstitute the lexicon.
+            lexicon.clear();
+            for (int i = 0; i < lexiconInv.size();i++) {
+                lexicon.put(lexiconInv.get(i), new Integer(i));
+            }
+            
+            // sanity check, make sure the indices in the lexicon map matches the index in the feature vector
+            for (int i = 0; i < lexiconInv.size();i++) {
+                if (i != ((Integer)lexicon.get(lexiconInv.get(i))).intValue()) {
+                    throw new RuntimeException("After optimization pruning, the index in the lexicon did "
+                        + "not match the inverted index.");
+                }
+            }
+        }
+    }
+
 
     /**
      * <!-- clone() --> Returns a deep clone of this lexicon implemented as a <code>HashMap</code>.
@@ -742,10 +775,9 @@ public int compare(int i1, int i2) {
         ByteString previousBSIdentifier = null;
         out.writeInt(indexes.length);
         out.writeInt(pruneCutoff);
-
         for (int i = 0; i < indexes.length; ++i) {
             Feature f = inverse.get(indexes[i]);
-            previousClassName =
+             previousClassName =
                     f.lexWrite(out, this, previousClassName, previousPackage, previousClassifier,
                             previousSIdentifier, previousBSIdentifier);
             previousPackage = f.getPackage();
@@ -757,7 +789,6 @@ else if (f.hasByteStringIdentifier())
 
             out.writeInt(indexes[i]);
         }
-
         if (featureCounts == null)
             out.writeInt(0);
         else
@@ -801,14 +832,12 @@ public void read(ExceptionlessInputStream in, boolean readCounts) {
         pruneCutoff = in.readInt();
         lexicon = null;
         lexiconInv = new FVector(N);
-
         for (int i = 0; i < N; ++i) {
             Feature f =
                     Feature.lexReadFeature(in, this, previousClass, previousPackage,
                             previousClassifier, previousSIdentifier, previousBSIdentifier);
             int index = in.readInt();
             lexiconInv.set(index, f);
-
             previousClass = f.getClass();
             previousPackage = f.getPackage();
             previousClassifier = f.getGeneratingClassifier();
@@ -817,7 +846,7 @@ public void read(ExceptionlessInputStream in, boolean readCounts) {
             else if (f.hasByteStringIdentifier())
                 previousBSIdentifier = f.getByteStringIdentifier();
         }
-
+        
         if (readCounts) {
             featureCounts = new IVector();
             featureCounts.read(in);
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java
index 106bb475..e0abd3ae 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java
@@ -16,6 +16,7 @@
 import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
 import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector;
 import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet;
+import edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LinearThresholdUnitOptimizer;
 import edu.illinois.cs.cogcomp.lbjava.util.FVector;
 
 
@@ -57,6 +58,7 @@
  *
  * @author Nick Rizzolo
  **/
+@SuppressWarnings("serial")
 public abstract class LinearThresholdUnit extends Learner {
     /** Default for {@link #initialWeight}. */
     public static final double defaultInitialWeight = 0;
@@ -68,6 +70,8 @@ public abstract class LinearThresholdUnit extends Learner {
     public static final double defaultLearningRate = 0.1;
     /** Default for {@link #weightVector}. */
     public static final SparseWeightVector defaultWeightVector = new SparseWeightVector();
+    /** any weight less than this is considered irrelevant. This is for prunning. */
+    public static final double defaultFeaturePruningThreshold = 0.000001;
 
     /**
      * The rate at which weights are updated; default {@link #defaultLearningRate}.
@@ -100,6 +104,8 @@ public abstract class LinearThresholdUnit extends Learner {
     protected double negativeThickness;
     /** The label producing classifier's allowable values. */
     protected String[] allowableValues;
+    /** feature pruning threshold caps magnitude of useful features. */
+    public double featurePruningThreshold;
 
     /**
      * Default constructor. The learning rate and threshold take default values, while the name of
@@ -159,6 +165,21 @@ public LinearThresholdUnit(double r, double t, double pt, double nt) {
         this("", r, t, pt, nt);
     }
 
+    /**
+     * Use this constructor to fit a thick separator, where the positive and negative sides of the
+     * hyperplane will be given the specified separate thicknesses, while the name of the classifier
+     * gets the empty string.
+     *
+     * @param r The desired learning rate value.
+     * @param t The desired threshold value.
+     * @param pt The desired positive thickness.
+     * @param nt The desired negative thickness.
+     * @param fpt The feature pruning threshold.
+     */
+    public LinearThresholdUnit(double r, double t, double pt, double nt, double fpt) {
+        this("", r, t, pt, nt, fpt);
+    }
+
     /**
      * Initializing constructor. Sets the threshold, positive thickness, and negative thickness to
      * their default values.
@@ -217,7 +238,21 @@ protected LinearThresholdUnit(String n, double r, double t, double pt) {
      * @param nt The desired negative thickness.
      **/
     protected LinearThresholdUnit(String n, double r, double t, double pt, double nt) {
-        this(n, r, t, pt, nt, (SparseWeightVector) defaultWeightVector.clone());
+        this(n, r, t, pt, nt, defaultFeaturePruningThreshold);
+    }
+    
+    /**
+     * Takes the rate, threshold, positive thickness, and negative thickness and vector.
+     *
+     * @param n The name of the classifier.
+     * @param r The desired learning rate.
+     * @param t The desired value for the threshold.
+     * @param pt The desired positive thickness.
+     * @param nt The desired negative thickness.
+     * @param v An initial weight vector.
+     **/
+    protected LinearThresholdUnit(String n, double r, double t, double pt, double nt, SparseWeightVector v) {
+        this(n, r, t, pt, nt, defaultFeaturePruningThreshold, v);
     }
 
     /**
@@ -229,9 +264,25 @@ protected LinearThresholdUnit(String n, double r, double t, double pt, double nt
      * @param t The desired value for the threshold.
      * @param pt The desired positive thickness.
      * @param nt The desired negative thickness.
+     * @param fpt The feature pruning threshold.
+     **/
+    protected LinearThresholdUnit(String n, double r, double t, double pt, double nt, double fpt) {
+        this(n, r, t, pt, nt, fpt, (SparseWeightVector) defaultWeightVector.clone());
+    }
+
+    /**
+     * Initializing constructor. Sets the threshold, positive thickness, and negative thickness to
+     * the specified values.
+     *
+     * @param n The name of the classifier.
+     * @param r The desired learning rate.
+     * @param t The desired value for the threshold.
+     * @param pt The desired positive thickness.
+     * @param nt The desired negative thickness.
+     * @param fpt The feature pruning threshold.
      * @param v An initial weight vector.
      **/
-    protected LinearThresholdUnit(String n, double r, double t, double pt, double nt,
+    protected LinearThresholdUnit(String n, double r, double t, double pt, double nt, double fpt,
             SparseWeightVector v) {
         super(n);
         Parameters p = new Parameters();
@@ -240,6 +291,7 @@ protected LinearThresholdUnit(String n, double r, double t, double pt, double nt
         p.learningRate = r;
         p.positiveThickness = pt;
         p.negativeThickness = nt;
+        p.featurePruningThreshold = fpt;
         setParameters(p);
     }
 
@@ -291,6 +343,7 @@ public void setParameters(Parameters p) {
         bias = p.initialWeight;
         positiveThickness = p.thickness + p.positiveThickness;
         negativeThickness = p.thickness + p.negativeThickness;
+        featurePruningThreshold = p.featurePruningThreshold;
     }
 
     /**
@@ -307,6 +360,7 @@ public Learner.Parameters getParameters() {
         p.threshold = threshold;
         p.positiveThickness = positiveThickness;
         p.negativeThickness = negativeThickness;
+        p.featurePruningThreshold = featurePruningThreshold;
         return p;
     }
 
@@ -531,6 +585,15 @@ public void initialize(int numExamples, int numFeatures) {
     }
 
 
+    /**
+     * When training is complete, optimize the feature set by discarding low value 
+     * weights.
+     */
+    public void doneTraining() {
+        super.doneTraining();
+        LinearThresholdUnitOptimizer ltuo = new LinearThresholdUnitOptimizer(this);
+        ltuo.optimize();
+    }
     /**
      * An LTU returns two scores; one for the negative classification and one for the positive
      * classification. By default, the score for the positive classification is the result of
@@ -751,7 +814,8 @@ public static class Parameters extends Learner.Parameters {
         public double positiveThickness;
         /** The thickness of the hyperplane on the negative side; default 0. */
         public double negativeThickness;
-
+        /** feature pruning threshold caps magnitude of useful features. */
+        public double featurePruningThreshold;
 
         /** Sets all the default values. */
         public Parameters() {
@@ -760,6 +824,7 @@ public Parameters() {
             initialWeight = defaultInitialWeight;
             threshold = defaultThreshold;
             thickness = defaultThickness;
+            featurePruningThreshold = defaultFeaturePruningThreshold;
         }
 
 
@@ -781,6 +846,7 @@ public Parameters(Parameters p) {
             thickness = p.thickness;
             positiveThickness = p.positiveThickness;
             negativeThickness = p.negativeThickness;
+            featurePruningThreshold = p.featurePruningThreshold;
         }
 
 
@@ -801,7 +867,6 @@ public void setParameters(Learner l) {
          **/
         public String nonDefaultString() {
             String result = super.nonDefaultString();
-
             if (learningRate != LinearThresholdUnit.defaultLearningRate)
                 result += ", learningRate = " + learningRate;
             if (initialWeight != LinearThresholdUnit.defaultInitialWeight)
@@ -814,10 +879,22 @@ public String nonDefaultString() {
                 result += ", positiveThickness = " + positiveThickness;
             if (negativeThickness != 0)
                 result += ", negativeThickness = " + negativeThickness;
-
+            if (featurePruningThreshold != LinearThresholdUnit.defaultFeaturePruningThreshold)
+                result += ", featurePruningThreshold = " + featurePruningThreshold;
             if (result.startsWith(", "))
                 result = result.substring(2);
             return result;
         }
     }
+
+
+    /**
+     * Given the index of the weights to prune, discard them, then shrink the weight vector down
+     * to save memory.
+     * @param uselessfeatures the features being pruned.
+     * @param numberFeatures the total number of features before pruning.
+     */
+	public void pruneWeights(int[] uselessfeatures, int numberFeatures) {
+		this.getWeightVector().pruneWeights(uselessfeatures, numberFeatures);
+	}
 }
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NeuralNetLearner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NeuralNetLearner.java
new file mode 100644
index 00000000..e25c5b5d
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NeuralNetLearner.java
@@ -0,0 +1,447 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.learn;
+
+import java.io.PrintStream;
+import java.util.Arrays;
+import java.util.Random;
+
+import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
+import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector;
+import edu.illinois.cs.cogcomp.lbjava.classify.RealPrimitiveStringFeature;
+import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet;
+import edu.illinois.cs.cogcomp.lbjava.neuralnet.Layer;
+import edu.illinois.cs.cogcomp.lbjava.neuralnet.SimpleNNTrainer;
+
+/**
+ * This class will leverage the Neural Net implementation {@see edu.illinois.cs.cogcomp.lbjava.neuralnet.NeuralNetwork}
+ * to allow creation and use of a backprop neural net implementation including momentum, bias, and back propogation 
+ * for learning. There is a threaded learner that works quite well ONLY where there are a large number of weights 
+ * between layers.
+ * <p>
+ * This class is really just a wrapper for a SimpleNNTrainer which does all the work of training.
+ * @author redman
+ */
+public class NeuralNetLearner extends Learner {
+
+	/** computed */
+	private static final long serialVersionUID = -3369861028861092661L;
+	
+	/** the parameters for learning and stuff. */
+    private Parameters parameters = new Parameters();
+    
+    /** This is the object that will train the neural net up. It uses it own
+     * interal mechanism and data representation for efficiency. */
+    private SimpleNNTrainer trainer = null;
+    
+    /** 
+     * our props include not only number of rounds (epochs), also a learning rate and momentum.
+     * @author redman
+     */
+    public static class Parameters extends Learner.Parameters {
+    	/** default */
+		private static final long serialVersionUID = 1L;
+
+		/** the learning rate. */
+    	public float learningRate = 0.5f;
+    	
+    	/** the momentum value. */
+    	public float momentum = 0.5f;
+    	
+    	/** the momentum value. */
+    	public int seed = -1;
+    	
+    	/** the number of inputs */
+    	public int inputCount = 0;
+    	
+    	/** the number of outputs */
+    	public int outputCount = 1;
+    	
+    	/** the number of outputs from the single hidden layer */
+    	public int hiddenCount = 100;
+    	
+        /** the layers of the neural network. */
+        private Layer[] layers;
+        
+        /**
+         * Copy properties from the provided properties.
+         * @param p the props to copy.
+         */
+        public Parameters(Parameters p) {
+        	this.learningRate = p.learningRate;
+        	this.momentum = p.momentum;
+        	this.seed = p.seed;
+        	this.inputCount = p.inputCount;
+        	this.outputCount = p.outputCount;
+        	this.hiddenCount = p.hiddenCount;
+        }
+        /**
+         * Copy properties from the provided properties.
+         * @param p the props to copy.
+         */
+        public Parameters() {
+        	this.learningRate = 0.5f;
+        	this.momentum = 0.5f;
+        	this.seed = -1;
+        	this.inputCount = 0;
+        	this.hiddenCount = 100;
+        	this.outputCount = 1;
+        }
+
+    }
+    
+    /** used to store inputs so we don't realloc these arrays over and over. This is an optimization
+     * only possible because we know this guys is not multithreaded. */
+    private float inputs[] = null;
+    
+    /** used to store inputs so we don't realloc these arrays over and over. This is an optimization
+     * only possible because we know this guys is not multithreaded. */
+    private float outputs[] = null;
+    
+    /** number of neurons in each layer, including input and output layers.*/
+    private int[] layerSizes = null;
+    
+	/**
+	 * Init the neural network learner by providing array with number of neurons in each layer, including 
+	 * the input layer. The caller will need to determin the number of inputs, the number of outputs and the number
+	 * of hidden layers, and the neurons in that layer. The first index in teh layerSizes indicates the number of inputs,
+	 * the middle layers sizes are determined by the middle integer sizes, and the number of outputs is the last number
+	 * of neurons.
+	 * @param layerSizes the number of neurons in each layer.
+	 */
+	public NeuralNetLearner () {
+		super("Howdy");
+		this.layerSizes = new int[3];
+	}
+	
+	/**
+	 * given arguments for initialization parameters.
+	 * @param p the parameters.
+	 */
+	public NeuralNetLearner(Parameters p) {
+		super("Howdy");
+		this.parameters = p;
+	}
+	
+    /**
+     * The learning rate takes the default value.
+     * @param n The name of the classifier.
+     */
+    public NeuralNetLearner(String n) {
+        super(n);
+    }
+
+	/**
+	 * Init the neural network learner by providing array with number of neurons in each layer, including 
+	 * the input layer. The caller will need to determin the number of inputs, the number of outputs and the number
+	 * of hidden layers, and the neurons in that layer. The first index in teh layerSizes indicates the number of inputs,
+	 * the middle layers sizes are determined by the middle integer sizes, and the number of outputs is the last number
+	 * of neurons.
+	 * @param layerSizes the number of neurons in each layer.
+	 */
+	public NeuralNetLearner (int[] layerSizes, Parameters p, boolean training) {
+		super("Howdy");
+		parameters = p;
+		parameters.layers = new Layer[layerSizes.length-1];
+		this.layerSizes = layerSizes;
+		this.forget();
+	}
+	
+    /** 
+     * Resets the weight vector to all zeros. 
+     */
+    public void forget() {
+        super.forget();
+        if (this.getInputCount() != -1) {
+    		this.layerSizes = new int[3];
+    		this.layerSizes[0] = this.getInputCount();
+    		this.layerSizes[1] = this.getHiddenCount();
+    		this.layerSizes[2] = this.getOutputCount();
+    		parameters.layers = new Layer[layerSizes.length-1];
+			Layer[] l = this.parameters.layers;
+	        Random r = new Random (1234);
+	        for (int i = 0; i < layerSizes.length-1; i++) {
+	            l[i] = new Layer(layerSizes[i], layerSizes[i+1], r);
+	        }
+			inputs = new float[l[0].getNumberInputs()];
+			outputs = new float[l[l.length-1].getNumberOutputs()];
+			trainer = new SimpleNNTrainer(parameters.layers, parameters.learningRate, parameters.momentum);
+        }
+    }
+
+    /**
+     * Returns a string describing the output feature type of this classifier.
+     * @return <code>"real"</code>
+     **/
+    public String getOutputType() {
+        return "real";
+    }
+    /**
+     * Writes the learned function's internal representation in binary form.
+     * @param out The output stream.
+     
+    public void write(ExceptionlessOutputStream out) {
+        super.write(out);
+        out.writeFloat(this.parameters.learningRate);
+        out.writeFloat(this.parameters.momentum);
+        out.writeInt(this.parameters.rounds);
+        if (this.layerSizes == null)
+        	out.writeInt(0);
+        else {
+	        out.writeInt(this.layerSizes.length);
+	        for (int neurons : this.layerSizes)
+	        	out.writeInt(neurons);
+	        for (Layer l : this.parameters.layers) {
+	        	l.write(out);
+	        }
+        }
+    }
+
+    /**
+     * Reads the binary representation of a learner with this object's run-time type, overwriting
+     * any and all learned or manually specified parameters as well as the label lexicon but without
+     * modifying the feature lexicon.
+     * @param in The input stream.
+     
+    public void read(ExceptionlessInputStream in) {
+        super.read(in);
+        this.parameters.learningRate = in.readFloat();
+        this.parameters.momentum = in.readFloat();
+        this.parameters.rounds = in.readInt();
+        int layers = in.readInt();
+        if (layers != 0) {
+	        int[] szs = new int[layers];
+	        for (int i = 0 ; i < szs.length; i++)
+	        	szs[i] = in.readInt();
+	        this.layerSizes = szs;
+	        Random r = new Random (1234);
+	        for (int i = 0; i < layerSizes.length-1; i++) {
+	            this.parameters.layers[i] = new Layer(layerSizes[i], layerSizes[i+1], r);
+	        }
+			trainer = new SimpleNNTrainer(parameters.layers, parameters.learningRate, parameters.momentum);
+	        for (Layer l : this.parameters.layers) {
+	        	l.read(in);
+	        }
+        }
+    }
+
+
+    /**
+     * Populate the input and output vectors with the values for only those 
+     * features that are represented.
+     */
+    final private void populateNNVector(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels,
+            double[] labelValues) {
+		Arrays.fill(inputs,0.0f);
+		Arrays.fill(outputs,0.0f);
+		for (int i = 0; i < exampleFeatures.length; i++)
+			inputs[exampleFeatures[i]] = (float)exampleValues[i];
+		if (exampleLabels != null)
+			for (int i = 0; i < exampleLabels.length; i++)
+				outputs[exampleLabels[i]] = (float)labelValues[i];
+
+    }
+    
+    /**
+     * Trains the learning algorithm given an object as an example.
+     * @param exampleFeatures The example's array of feature indices.
+     * @param exampleValues The example's array of feature values.
+     * @param exampleLabels The example's label(s).
+     * @param labelValues The labels' values.
+     **/
+    public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels,
+            double[] labelValues) {
+    	this.populateNNVector(exampleFeatures, exampleValues, exampleLabels, labelValues);
+		this.trainer.train(inputs, outputs);
+    }
+
+    /**
+     * @param exampleFeatures The example's array of feature indices.
+     * @param exampleValues The example's array of feature values.
+     * @return <code>null</code>
+     **/
+    public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) {
+    	return null;
+    }
+
+    /**
+     * Returns the classification of the given example as a single feature instead of a
+     * {@link FeatureVector}.
+     * @param f The features array.
+     * @param v The values array.
+     * @return The classification of the example as a feature.
+     **/
+    public Feature featureValue(int[] f, double[] v) {
+    	this.populateNNVector(f, v, null, null);
+    	
+    	// this returns the activation energies for ALL layers, we only wan the output layer
+    	float[][] results = this.trainer.activate(inputs);
+
+    	// the last vector contains the score, this is the output of the last layer.
+    	return  new RealPrimitiveStringFeature(containingPackage, name, "", results [results.length-1][0]);
+    }
+
+    /**
+     * Simply computes the dot product of the weight vector and the example
+     *
+     * @param exampleFeatures The example's array of feature indices.
+     * @param exampleValues The example's array of feature values.
+     * @return The computed real value.
+     **/
+    public double realValue(int[] exampleFeatures, double[] exampleValues) {
+    	this.populateNNVector(exampleFeatures, exampleValues, null, null);
+    	return (double) this.trainer.activate(inputs)[0][0];
+    }
+
+    /**
+     * Simply computes the dot product of the weight vector and the feature vector extracted from
+     * the example object.
+     *
+     * @param exampleFeatures The example's array of feature indices.
+     * @param exampleValues The example's array of feature values.
+     * @return The computed feature (in a vector).
+     **/
+    public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) {
+        return new FeatureVector(featureValue(exampleFeatures, exampleValues));
+    }
+
+    /**
+     * Writes the algorithm's internal representation as text. In the first line of output, the name
+     * of the classifier is printed, followed by {@link #learningRate} and {@link #bias}.
+     * @param out The output stream.
+     */
+    public void write(PrintStream out) {
+        out.println(name + ": " + this.parameters.learningRate + ", " + this.parameters.momentum + ", " + this.parameters.rounds);
+        for (Layer l : this.parameters.layers) {
+        	l.write(out);
+        }
+    }
+
+    /** 
+     * Returns a deep clone of this learning algorithm. 
+     * TODO
+     */
+    public Object clone() {
+        NeuralNetLearner clone = null;
+        try {
+            clone = (NeuralNetLearner) super.clone();
+        } catch (Exception e) {
+            System.err.println("Error cloning StochasticGradientDescent: " + e);
+            System.exit(1);
+        }
+        return clone;
+    }
+
+	/**
+	 * @return the seed to seed all random number gen.
+	 */
+	public int getSeed() {
+		return this.parameters.seed;
+	}
+
+	/**
+	 * @param seed the seed to set
+	 */
+	public void setSeed(int seed) {
+		this.parameters.seed = seed;
+	}
+
+	/**
+	 * @return the number of total inputs
+	 */
+	public int getInputCount() {
+		return this.parameters.inputCount;
+	}
+
+	/**
+	 * @param inputCount the inputCount to set
+	 */
+	public void setInputCount(int inputCount) {
+		this.parameters.inputCount = inputCount;
+	}
+
+	/**
+	 * @return the outputCount
+	 */
+	public int getOutputCount() {
+		return this.parameters.outputCount;
+	}
+
+	/**
+	 * @param outputCount the outputCount to set
+	 */
+	public void setOutputCount(int outputCount) {
+		this.parameters.outputCount = outputCount;
+	}
+
+	/**
+	 * @return the hiddenCount
+	 */
+	public int getHiddenCount() {
+		return this.parameters.hiddenCount;
+	}
+
+	/**
+	 * @param hiddenCount the hiddenCount to set
+	 */
+	public void setHiddenCount(int hiddenCount) {
+		this.parameters.hiddenCount = hiddenCount;
+	}
+
+	/**
+	 * @return the learning rate used to throttle the rate at wich the weight parameters change.
+	 */
+	public float getLearningRate() {
+		return parameters.learningRate;
+	}
+
+	/**
+	 * set the learning rate at which the weight parameters change.
+	 * @param learningRate the learning rate at which the weight parameters change.
+	 */
+	public void setLearningRate(float learningRate) {
+		this.parameters.learningRate = learningRate;
+	}
+
+	public float getMomentum() {
+		return parameters.momentum;
+	}
+
+	/**
+	 * set the value used to prevent convergence against local minimum.
+	 * @param momentum used to prevent convergence against local minimum.
+	 */
+	public void setMomentum(float momentum) {
+		this.parameters.momentum = momentum;
+	}
+
+	/**
+	 * Get the number of epochs.
+	 * @return number of epochs to train.
+	 */
+	public int getEpochs() {
+		return parameters.rounds;
+	}
+	
+	/**
+	 * set the number of training iterations. More should yield better results, until overfit.
+	 * @param learningRate set the number of training iterations.
+	 */
+	public void setEpochs(int epochs) {
+		this.parameters.rounds = epochs;
+	}
+	
+    /**
+     * Retrieves the parameters that are set in this learner.
+     * @return An object containing all the values of the parameters that control the behavior of
+     *         this learning algorithm.
+     **/
+    public Learner.Parameters getParameters() {
+    	return parameters;
+    }
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java
index c9b74899..4e76223a 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java
@@ -61,6 +61,15 @@ public class SparseAveragedPerceptron extends SparsePerceptron {
      * {@link SparseAveragedPerceptron.AveragedWeightVector}.
      **/
     protected AveragedWeightVector awv;
+    
+    /**
+     * @return the awv the averaged weight vector
+     */
+    public AveragedWeightVector getAveragedWeightVector() {
+        return awv;
+    }
+
+
     /** Keeps the extra information necessary to compute the averaged bias. */
     protected double averagedBias;
 
@@ -729,5 +738,46 @@ public Object clone() {
         public SparseWeightVector emptyClone() {
             return new AveragedWeightVector();
         }
+        
+        /**
+         * If we prune worthless weights, we must also prune useless averages.
+         * @param uselessfeatures useless features.
+         * @param numfeatures since this weight vec does not know how many features there are, it must be passed in
+         */
+    	public void pruneWeights(int[] uselessfeatures, int numfeatures) {
+    		if (uselessfeatures.length == 0) 
+    			return;
+    		super.pruneWeights(uselessfeatures, numfeatures);
+    		
+    		// create a new smaller weight vector for the pruned weights.
+    		int oldsize = this.averagedWeights.size();
+    		if (oldsize > numfeatures) {
+    			throw new RuntimeException("There was an averaged weight vector with more weights("+oldsize+
+    					") than the number of features("+numfeatures+")!");
+    		}
+    		int newsize = numfeatures - uselessfeatures.length;
+    		double [] newvec = new double[newsize];
+    		
+    		// copy the weights from the old vector.
+    		int uselessindex = 0;
+    		int newvecindex = 0;
+    		for (int oldindex = 0; oldindex < oldsize; oldindex++) {
+    			if (uselessindex < uselessfeatures.length && uselessfeatures[uselessindex] == oldindex) {
+    				// this is a useless feature, we will skip it.
+    				uselessindex++;
+    			} else {
+    				newvec[newvecindex] = averagedWeights.get(oldindex);
+    				newvecindex++;
+    			}
+    		}
+    		
+    		// compress the array.
+    		if (newvecindex != newsize) {
+    			double[] tmp = new double[newvecindex];
+    			System.arraycopy(newvec, 0, tmp, 0, newvecindex);;
+    			newvec = tmp;
+    		}
+    		this.averagedWeights = new DVector(newvec);
+    	}
     }
 }
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java
index b63d0b0f..822fc1fd 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java
@@ -10,6 +10,7 @@
 import java.io.PrintStream;
 import java.util.Collection;
 import java.util.Iterator;
+import java.util.Map.Entry;
 
 import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream;
 import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream;
@@ -19,6 +20,7 @@
 import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
 import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector;
 import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet;
+import edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.SparseNetworkOptimizer;
 
 /**
  * A <code>SparseNetworkLearner</code> uses multiple {@link LinearThresholdUnit}s to make a
@@ -44,7 +46,6 @@
 public class SparseNetworkLearner extends Learner {
     private static final long serialVersionUID = 1L;
 
-
     /** Default for {@link #baseLTU}. */
     public static final LinearThresholdUnit defaultBaseLTU = new SparseAveragedPerceptron();
 
@@ -70,7 +71,6 @@ public class SparseNetworkLearner extends Learner {
     /** Whether or not this learner's labeler produces conjunctive features. */
     protected boolean conjunctiveLabels;
 
-
     /**
      * Instantiates this multi-class learner with the default learning algorithm:
      * {@link #defaultBaseLTU}.
@@ -185,7 +185,6 @@ public void setParameters(Parameters p) {
         setLTU(p.baseLTU);
     }
 
-
     /**
      * Retrieves the parameters that are set in this learner.
      *
@@ -198,7 +197,6 @@ public Learner.Parameters getParameters() {
         return p;
     }
 
-
     /**
      * Sets the <code>baseLTU</code> variable. This method will <i>not</i> have any effect on the
      * LTUs that already exist in the network. However, new LTUs created after this method is
@@ -211,7 +209,6 @@ public void setLTU(LinearThresholdUnit ltu) {
         baseLTU.name = name + "$baseLTU";
     }
 
-
     /**
      * Sets the labeler.
      *
@@ -229,7 +226,6 @@ public void setLabeler(Classifier l) {
         super.setLabeler(l);
     }
 
-
     /**
      * Sets the extractor.
      *
@@ -255,7 +251,6 @@ public void setNetworkLabel(int label) {
         network.set(label, ltu);
     }
 
-
     /**
      * Each example is treated as a positive example for the linear threshold unit associated with
      * the label's value that is active for the example and as a negative example for all other
@@ -290,7 +285,17 @@ public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLa
             ltu.learn(exampleFeatures, exampleValues, l, labelValues);
         }
     }
-
+    
+    /** 
+     * When we complete learning, we will do an optimization.
+     */
+    public void doneTraining() {
+        super.doneTraining();
+        
+        // do the optimization
+        SparseNetworkOptimizer optimizer = new SparseNetworkOptimizer(this);
+        optimizer.optimize();
+    }
 
     /** Simply calls <code>doneLearning()</code> on every LTU in the network. */
     public void doneLearning() {
@@ -304,14 +309,12 @@ public void doneLearning() {
         }
     }
 
-
     /** Sets the number of examples and features. */
     public void initialize(int ne, int nf) {
         numExamples = ne;
         numFeatures = nf;
     }
 
-
     /**
      * Simply calls {@link LinearThresholdUnit#doneWithRound()} on every LTU in the network.
      */
@@ -326,14 +329,12 @@ public void doneWithRound() {
         }
     }
 
-
     /** Clears the network. */
     public void forget() {
         super.forget();
         network = new OVector();
     }
 
-
     /**
      * Returns scores for only those labels in the given collection. If the given collection is
      * empty, scores for all labels will be returned. If there is no {@link LinearThresholdUnit}
@@ -352,7 +353,6 @@ public ScoreSet scores(Object example, Collection candidates) {
         return scores((int[]) exampleArray[0], (double[]) exampleArray[1], candidates);
     }
 
-
     /**
      * Returns scores for only those labels in the given collection. If the given collection is
      * empty, scores for all labels will be returned. If there is no {@link LinearThresholdUnit}
@@ -404,7 +404,6 @@ public ScoreSet scores(int[] exampleFeatures, double[] exampleValues, Collection
         return result;
     }
 
-
     /**
      * This method is a surrogate for {@link #scores(int[],double[],Collection)} when the labeler is
      * known to produce conjunctive features. It is necessary because when given a string label from
@@ -438,7 +437,6 @@ protected ScoreSet conjunctiveScores(int[] exampleFeatures, double[] exampleValu
         return result;
     }
 
-
     /**
      * Produces a set of scores indicating the degree to which each possible discrete classification
      * value is associated with the given example object. These scores are just the scores of each
@@ -465,7 +463,6 @@ public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) {
         return result;
     }
 
-
     /**
      * Returns the classification of the given example as a single feature instead of a
      * {@link FeatureVector}.
@@ -494,7 +491,6 @@ public Feature featureValue(int[] f, double[] v) {
         return bestValue == -1 ? null : predictions.get(bestValue);
     }
 
-
     /**
      * This implementation uses a winner-take-all comparison of the outputs from the individual
      * linear threshold units' score methods.
@@ -507,7 +503,6 @@ public String discreteValue(int[] exampleFeatures, double[] exampleValues) {
         return featureValue(exampleFeatures, exampleValues).getStringValue();
     }
 
-
     /**
      * This implementation uses a winner-take-all comparison of the outputs from the individual
      * linear threshold units' score methods.
@@ -520,7 +515,6 @@ public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) {
         return new FeatureVector(featureValue(exampleFeatures, exampleValues));
     }
 
-
     /**
      * Using this method, the winner-take-all competition is narrowed to involve only those labels
      * contained in the specified list. The list must contain only <code>String</code>s.
@@ -535,7 +529,6 @@ public Feature valueOf(Object example, Collection candidates) {
         return valueOf((int[]) exampleArray[0], (double[]) exampleArray[1], candidates);
     }
 
-
     /**
      * Using this method, the winner-take-all competition is narrowed to involve only those labels
      * contained in the specified list. The list must contain only <code>String</code>s.
@@ -596,7 +589,6 @@ public Feature valueOf(int[] exampleFeatures, double[] exampleValues, Collection
         return predictions.get(bestValue);
     }
 
-
     /**
      * This method is a surrogate for {@link #valueOf(int[],double[],Collection)} when the labeler
      * is known to produce conjunctive features. It is necessary because when given a string label
@@ -634,7 +626,6 @@ protected Feature conjunctiveValueOf(int[] exampleFeatures, double[] exampleValu
         return predictions.get(bestValue);
     }
 
-
     /**
      * Writes the algorithm's internal representation as text.
      *
@@ -659,7 +650,6 @@ public void write(PrintStream out) {
         out.close();
     }
 
-
     /**
      * Writes the learned function's internal representation in binary form.
      *
@@ -682,7 +672,6 @@ public void write(ExceptionlessOutputStream out) {
         out.close();
     }
 
-
     /**
      * Reads the binary representation of a learner with this object's run-time type, overwriting
      * any and all learned or manually specified parameters as well as the label lexicon but without
@@ -700,7 +689,6 @@ public void read(ExceptionlessInputStream in) {
             network.add(Learner.readLearner(in));
     }
 
-
     /** Returns a deep clone of this learning algorithm. */
     public Object clone() {
         SparseNetworkLearner clone = null;
@@ -727,7 +715,6 @@ public Object clone() {
         return clone;
     }
 
-
     /**
      * Simply a container for all of {@link SparseNetworkLearner}'s configurable parameters. Using
      * instances of this class should make code more readable and constructors less complicated.
@@ -743,13 +730,11 @@ public static class Parameters extends Learner.Parameters {
          **/
         public LinearThresholdUnit baseLTU;
 
-
         /** Sets all the default values. */
         public Parameters() {
             baseLTU = (LinearThresholdUnit) defaultBaseLTU.clone();
         }
 
-
         /**
          * Sets the parameters from the parent's parameters object, giving defaults to all
          * parameters declared in this object.
@@ -759,14 +744,12 @@ public Parameters(Learner.Parameters p) {
             baseLTU = (LinearThresholdUnit) defaultBaseLTU.clone();
         }
 
-
         /** Copy constructor. */
         public Parameters(Parameters p) {
             super(p);
             baseLTU = p.baseLTU;
         }
 
-
         /**
          * Calls the appropriate <code>Learner.setParameters(Parameters)</code> method for this
          * <code>Parameters</code> object.
@@ -777,7 +760,6 @@ public void setParameters(Learner l) {
             ((SparseNetworkLearner) l).setParameters(this);
         }
 
-
         /**
          * Creates a string representation of these parameters in which only those parameters that
          * differ from their default values are mentioned.
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java
index defe1001..0353daba 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java
@@ -29,7 +29,9 @@
  * @author Nick Rizzolo
  **/
 public class SparseWeightVector implements Cloneable, Serializable {
-    /**
+    /** default. */
+	private static final long serialVersionUID = 1L;
+	/**
      * When a feature appears in an example but not in this vector, it is assumed to have this
      * weight.
      **/
@@ -40,7 +42,7 @@ public class SparseWeightVector implements Cloneable, Serializable {
     /** The weights in the vector indexed by their {@link Lexicon} key. */
     protected DVector weights;
 
-    /** Simply instantiates {@link #weights}. */
+	/** Simply instantiates {@link #weights}. */
     public SparseWeightVector() {
         this(new DVector(defaultCapacity));
     }
@@ -106,6 +108,13 @@ public void setWeight(int featureIndex, double w, double defaultW) {
         weights.set(featureIndex, w, defaultW);
     }
 
+    /**
+     * For those cases where we need the raw weights (during model optimization).
+     * @return the unmolested weights.
+     */
+    public DVector getRawWeights() {
+		return weights;
+	}
 
     /**
      * Takes the dot product of this <code>SparseWeightVector</code> with the argument vector, using
@@ -317,7 +326,8 @@ public void toStringJustWeights(PrintStream out) {
      * @param min Sets the minimum width for the textual representation of all features.
      * @param lex The feature lexicon.
      **/
-    public void toStringJustWeights(PrintStream out, int min, Lexicon lex) {
+    @SuppressWarnings({ "rawtypes", "unchecked" })
+	public void toStringJustWeights(PrintStream out, int min, Lexicon lex) {
         Map map = lex.getMap();
         Map.Entry[] entries = (Map.Entry[]) map.entrySet().toArray(new Map.Entry[map.size()]);
         Arrays.sort(entries, new Comparator() {
@@ -420,7 +430,8 @@ public static SparseWeightVector readWeightVector(ExceptionlessInputStream in) {
         String name = in.readString();
         if (name == null)
             return null;
-        Class c = ClassUtils.getClass(name);
+        @SuppressWarnings("rawtypes")
+		Class c = ClassUtils.getClass(name);
         SparseWeightVector result = null;
 
         try {
@@ -482,4 +493,44 @@ public Object clone() {
     public SparseWeightVector emptyClone() {
         return new SparseWeightVector();
     }
+
+    /**
+     * delete all irrelevant feature weights.
+     * @param uselessfeatures useless features.
+     * @param numfeatures since this weight vec does not know how many features there are, it must be passed in
+     */
+	public void pruneWeights(int[] uselessfeatures, int numfeatures) {
+		if (uselessfeatures.length == 0) 
+			return;
+		
+		// create a new smaller weight vector for the pruned weights.
+		int oldsize = weights.size();
+		if (oldsize > numfeatures) {
+			throw new RuntimeException("There was a weight vector with more weights("+oldsize+
+					") than the number of features("+numfeatures+")!");
+		}
+		int newsize = numfeatures - uselessfeatures.length;
+		double [] newvec = new double[newsize];
+		
+		// copy the weights from the old vector.
+		int uselessindex = 0;
+		int newvecindex = 0;
+		for (int oldindex = 0; oldindex < oldsize; oldindex++) {
+			if (uselessindex < uselessfeatures.length && uselessfeatures[uselessindex] == oldindex) {
+				// this is a useless feature, we will skip it.
+				uselessindex++;
+			} else {
+				newvec[newvecindex] = weights.get(oldindex);
+				newvecindex++;
+			}
+		}
+		
+		// compress the array.
+		if (newvecindex != newsize) {
+			double[] tmp = new double[newvecindex];
+			System.arraycopy(newvec, 0, tmp, 0, newvecindex);;
+			newvec = tmp;
+		}
+		this.weights = new DVector(newvec);
+	}
 }
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java
index aec40db5..2c930b57 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java
@@ -24,6 +24,7 @@
 import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
 import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector;
 import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet;
+import edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.SupportVectorMachineOptimizer;
 import edu.illinois.cs.cogcomp.lbjava.util.ByteString;
 import edu.illinois.cs.cogcomp.lbjava.util.FVector;
 
@@ -64,6 +65,10 @@
  * @author Michael Paul
  **/
 public class SupportVectorMachine extends Learner {
+    /**
+     * 
+     */
+    private static final long serialVersionUID = 1L;
     /** Default for {@link #solverType}. */
     public static final String defaultSolverType = "L2LOSS_SVM";
     /** Default for {@link #C}. */
@@ -72,6 +77,11 @@ public class SupportVectorMachine extends Learner {
     public static final double defaultEpsilon = 0.1;
     /** Default for {@link #bias}. */
     public static final double defaultBias = 1.0;
+    /** any weight less than this is considered irrelevant. This is for prunning. */
+    public static final double defaultFeaturePruningThreshold = 0.000001;
+    
+    /** feature pruning threshold caps magnitude of useful features. */
+    public double featurePruningThreshold;
 
     /**
      * Keeps track of whether the doneLearning() warning message has been printed.
@@ -114,6 +124,14 @@ public class SupportVectorMachine extends Learner {
     /** The number of bias features; there are either 0 or 1 of them. */
     protected int biasFeatures;
 
+    /**
+     * @return the biasFeatures
+     */
+    public int getBiasFeatures() {
+        return biasFeatures;
+    }
+
+
     /** Controls if <code>liblinear</code>-related messages are output */
     protected boolean displayLL = false;
 
@@ -210,6 +228,20 @@ public SupportVectorMachine(double c, double e, double b, String s, boolean d) {
         this("", c, e, b, s, d);
     }
 
+    /**
+     * Initializing constructor. The name of the classifier gets the empty string.
+     *
+     * @param c The desired C value.
+     * @param e The desired epsilon value.
+     * @param b The desired bias.
+     * @param s The solver type.
+     * @param d Toggles if the <code>liblinear</code>-related output should be displayed.
+     * @param fpt the feature pruning threshold.
+     **/
+    public SupportVectorMachine(double c, double e, double b, String s, boolean d, double fpt) {
+        this("", c, e, b, s, d, fpt);
+    }
+
     /**
      * Initializing constructor. C, epsilon, the bias, and the solver type take the default values.
      *
@@ -276,6 +308,20 @@ public SupportVectorMachine(String n, double c, double e, double b, String s) {
      * @param d Toggles if the <code>liblinear</code>-related output should be displayed.
      **/
     public SupportVectorMachine(String n, double c, double e, double b, String s, boolean d) {
+        this(n, c, e, b, s, d, SupportVectorMachine.defaultFeaturePruningThreshold);
+    }
+    
+    /**
+     * Initializing constructor.
+     *
+     * @param n The name of the classifier.
+     * @param c The desired C value.
+     * @param e The desired epsilon value.
+     * @param b The desired bias.
+     * @param s The solver type.
+     * @param d Toggles if the <code>liblinear</code>-related output should be displayed.
+     **/
+    public SupportVectorMachine(String n, double c, double e, double b, String s, boolean d, double fpt) {
         super(n);
         newLabelLexicon = labelLexicon;
         Parameters p = new Parameters();
@@ -284,10 +330,12 @@ public SupportVectorMachine(String n, double c, double e, double b, String s, bo
         p.bias = b;
         p.solverType = s;
         p.displayLL = d;
+        p.featurePruningThreshold = fpt;
         allowableValues = new String[0];
         setParameters(p);
     }
 
+
     /**
      * Initializing constructor. Sets all member variables to their associated settings in the
      * {@link SupportVectorMachine.Parameters} object. The name of the classifier gets the empty
@@ -317,9 +365,23 @@ public double[] getWeights() {
         return weights;
     }
 
+    /**
+     * @return the numFeatures
+     */
+    public int getNumFeatures() {
+        return numFeatures;
+    }
+
     public int getNumClasses() {
         return numClasses;
     }
+    
+    /**
+     * @return the solverType
+     */
+    public String getSolverType() {
+        return solverType;
+    }
 
     /**
      * Sets the values of parameters that control the behavior of this learning algorithm.
@@ -333,6 +395,7 @@ public void setParameters(Parameters p) {
         biasFeatures = (bias >= 0) ? 1 : 0;
         solverType = p.solverType;
         displayLL = p.displayLL;
+        featurePruningThreshold = p.featurePruningThreshold;
     }
 
 
@@ -349,6 +412,7 @@ public Learner.Parameters getParameters() {
         p.bias = bias;
         p.solverType = solverType;
         p.displayLL = displayLL;
+        p.featurePruningThreshold = this.featurePruningThreshold;
         return p;
     }
 
@@ -403,6 +467,7 @@ public void initialize(int ne, int nf) {
      * @param exampleLabels The example's array of label indices.
      * @param labelValues The example's array of label values.
      **/
+    @SuppressWarnings({ "unchecked", "rawtypes" })
     public void learn(final int[] exampleFeatures, double[] exampleValues, int[] exampleLabels,
             double[] labelValues) {
         // Expand the size of the example arrays if they are full.
@@ -600,12 +665,22 @@ else if (newLabelLexicon.size() > labelLexicon.size()) {
         weights = trainedModel.getFeatureWeights();
         allExamples = null;
         allLabels = null;
-
         if (displayLL)
             System.out.println("  Finished training at " + new Date());
     }
 
 
+    /**
+     * Optimize the model by doing feature pruning, drop the low value weights.
+     */
+    public void doneTraining() {
+        super.doneTraining();
+        
+        // optimize the resulting model by discarding low weight features.
+        SupportVectorMachineOptimizer svmo = new SupportVectorMachineOptimizer(this);
+        svmo.optimize();
+    }
+    
     /**
      * Writes the algorithm's internal representation as text. In the first line of output, the name
      * of the classifier is printed, followed by {@link #C}, {@link #epsilon}, {@link #bias}, and
@@ -895,7 +970,7 @@ public double score(int[] exampleFeatures, double[] exampleValues, int label) {
             numClasses = 1;
             label = 0;
         }
-
+        
         for (int i = 0; i < exampleFeatures.length; i++) {
             int f = exampleFeatures[i];
 
@@ -929,6 +1004,43 @@ public Feature valueOf(Object example, Collection candidates) {
     }
 
 
+    /**
+     * Given the index of the weights to prune, discard them, then shrink the weight vector down
+     * to save memory.
+     * @param uselessfeatures the indices of the features being pruned.
+     * @param numberFeatures the total number of features before pruning.
+     */
+    public void pruneWeights(int[] uselessfeatures, int numberFeatures) {
+        int sz = numberFeatures - uselessfeatures.length;
+        double[] newweights = new double[sz+biasFeatures];
+        int nextToPrune = 0;
+        int newweightindex = 0;
+        for (int i = 0; i < weights.length; i++) {
+            if (nextToPrune < uselessfeatures.length && i == uselessfeatures[nextToPrune]) {
+                if (Math.abs(weights[i]) > this.featurePruningThreshold)
+                    throw new IllegalArgumentException("Pruning a high value weight : "+weights[i]+" at "+i);
+                nextToPrune++;
+            } else {
+                if (newweightindex >= newweights.length)
+                    throw new IllegalArgumentException("Attempted to overpopulate the new weight : indx="
+                                    +i+" features="+numberFeatures+" useless="+uselessfeatures.length);
+                newweights[newweightindex] = weights[i];
+                newweightindex++;
+            }
+        }
+        
+        // do some sanity checks.
+        if (newweightindex != newweights.length)
+            throw new IllegalArgumentException("The new pruned weight vector was not fully populated!");
+        if (nextToPrune != uselessfeatures.length)
+            throw new IllegalArgumentException("Not all the prunable features were pruned!");
+        
+        // all good, do the replacement.
+        System.out.println("SVM.pruneWeights: "+sz+" features, "+newweights.length+" weights size");
+        numFeatures = sz;
+        weights = newweights;
+    }
+
     /**
      * Using this method, the winner-take-all competition is narrowed to involve only those labels
      * contained in the specified list. The list must contain only <code>String</code>s.
@@ -1062,6 +1174,14 @@ public static class Parameters extends Learner.Parameters {
          * </ul>
          **/
         public String solverType;
+        
+        /**
+         * @return the solverType
+         */
+        public String getSolverType() {
+            return solverType;
+        }
+
         /**
          * The cost parameter C; default {@link SupportVectorMachine#defaultC}
          **/
@@ -1081,7 +1201,9 @@ public static class Parameters extends Learner.Parameters {
          * <code>false</code>
          **/
         public boolean displayLL;
-
+        
+        /** feature pruning threshold caps magnitude of useful features. */
+        public double featurePruningThreshold;
 
         /** Sets all the default values. */
         public Parameters() {
@@ -1090,6 +1212,7 @@ public Parameters() {
             epsilon = defaultEpsilon;
             bias = defaultBias;
             displayLL = false;
+            featurePruningThreshold = defaultFeaturePruningThreshold;
         }
 
 
@@ -1104,6 +1227,7 @@ public Parameters(Learner.Parameters p) {
             epsilon = defaultEpsilon;
             bias = defaultBias;
             displayLL = false;
+            featurePruningThreshold = defaultFeaturePruningThreshold;
         }
 
 
@@ -1115,6 +1239,7 @@ public Parameters(Parameters p) {
             epsilon = p.epsilon;
             bias = p.bias;
             displayLL = p.displayLL;
+            featurePruningThreshold = p.featurePruningThreshold;
         }
 
 
@@ -1168,6 +1293,8 @@ public String nonDefaultString() {
                 result += ", epsilon = " + epsilon;
             if (bias != SupportVectorMachine.defaultBias)
                 result += ", bias = " + bias;
+            if (featurePruningThreshold != defaultFeaturePruningThreshold)
+                result += ", feature pruning threshold = " + featurePruningThreshold;
 
             if (result.startsWith(", "))
                 result = result.substring(2);
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LexiconOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LexiconOptimizer.java
new file mode 100644
index 00000000..f6a68d15
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LexiconOptimizer.java
@@ -0,0 +1,181 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Map.Entry;
+
+import edu.illinois.cs.cogcomp.lbjava.classify.DiscreteConjunctiveFeature;
+import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
+import edu.illinois.cs.cogcomp.lbjava.classify.RealConjunctiveFeature;
+import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon;
+
+/**
+ * This class defines the life cycle methods for pruning useless features
+ * from a lexicon. Features for example that carry zero weights with them are 
+ * not useful to the model, so can be eliminated saving space and execution time, without
+ * affecting accuracy (much).
+ * @author redman
+ */
+abstract public class LexiconOptimizer {
+    
+    /** any weight less than this is considered irrelevant. This is for prunning. */
+    private static final double PRUNING_THRESHOLD = 0.000001;
+
+	/** lexicon contains the features we will operate on. */
+	protected Lexicon lexicon;
+
+    /** this also for testing, save feature names we will delete, check the names when we do. */
+    final protected ArrayList<Feature> uselessFeatureNames = new ArrayList<Feature>();
+
+    /** this is the threshold we use to discard useless features. */
+    protected double threshold = PRUNING_THRESHOLD;
+    
+    /**
+	 * We must have a lexicon to perform this operation.
+     * @param lexicon the lexicon object.
+     * @param threshold the feature pruning threshold.
+	 */
+	protected LexiconOptimizer(Lexicon lexicon, double threshold) {
+		this.lexicon = lexicon;
+		this.threshold = threshold;
+	}
+	
+    /**
+     * Determine if the provided feature has sum of weights greater than a threshold value, 
+     * and discard the feature if it falls below.
+     * @param lex the lexicon.
+     * @param f the feature.
+     * @return true if the feature has any value, there is a
+     */
+    abstract protected boolean hasWeight(Lexicon lex, Feature f);
+
+    /**
+     * This method returns the number of features. This implementation assumes the 
+     * lexicon is populated, but that's not always the case (with SVM for example appears
+     * to not always have a populated lexicon). In these cases, this method may be overriden.
+     * @return the number of featues.
+     */
+    protected int getNumberFeatures() {
+        return lexicon.size();
+    }
+    
+	/** 
+	 * do the optimization
+	 */
+	public void optimize () {
+		
+		int originalNumFeatures = this.getNumberFeatures();
+		int [] uselessfeatures = identifyUselessFeatures();
+		pruneWeights(uselessfeatures, originalNumFeatures);
+		pruneLexicon(uselessfeatures);
+		
+		System.out.println("LexiconOptimizer optimization complete, pruned "
+		                +uselessfeatures.length+" features of "+originalNumFeatures+", leaving "+(originalNumFeatures - uselessfeatures.length)+
+		                " at threshold of "+threshold);
+	}
+
+    /**
+     * @param f the feature.
+     * @return true if the feature is conjunctive.
+     */
+    static private boolean isConjunctive(Feature f) {
+        return (f instanceof DiscreteConjunctiveFeature || f instanceof RealConjunctiveFeature);
+    }
+
+    /**
+     * If this conjunctive feature has weight, add it and all it's children to the white list.
+     * @param lex the lexicon maps feature to index.
+     * @param whitelist the white list we will add to.
+     * @param f the conjunctive feature.
+     */
+    private void traverseConjunctiveTree(HashSet<Feature> whitelist, Feature f) {
+
+        // add the conjunctive feature.
+        whitelist.add(f);
+
+        if (f instanceof DiscreteConjunctiveFeature) {
+
+            // add it's direct children
+            DiscreteConjunctiveFeature dcf = (DiscreteConjunctiveFeature) f;
+            whitelist.add(dcf.getLeft());
+            whitelist.add(dcf.getRight());
+
+            // possible add any children of children.
+            if (isConjunctive(dcf.getLeft()))
+                traverseConjunctiveTree(whitelist, dcf.getLeft());
+            if (isConjunctive(dcf.getRight()))
+                traverseConjunctiveTree(whitelist, dcf.getRight());
+        } else {
+
+            // add it's direct children
+            RealConjunctiveFeature rcf = (RealConjunctiveFeature) f;
+            whitelist.add(rcf.getLeft());
+            whitelist.add(rcf.getRight());
+
+            // possible add any children of children.
+            if (isConjunctive(rcf.getLeft()))
+                traverseConjunctiveTree(whitelist, rcf.getLeft());
+            if (isConjunctive(rcf.getRight()))
+                traverseConjunctiveTree(whitelist, rcf.getRight());
+        }
+    }
+
+    /**
+     * Find all features we must whitelist. For each conjunctive feature that has weight, we must keep
+     * all it's children, regardless of weight, and the rest of the tree from there on down.
+     * @param lex the lexicon.
+     * @return the conjunctive features.
+     */
+    protected HashSet<Feature> compileWhitelist(Lexicon lex) {
+        HashSet<Feature> whitelist = new HashSet<Feature>();
+        for (Object e : lex.getMap().entrySet()) {
+            @SuppressWarnings("unchecked")
+            Entry<Feature, Integer> entry = (Entry<Feature, Integer>) e;
+            Feature f = entry.getKey();
+            if (isConjunctive(f) && this.hasWeight(lex, f)) {
+
+                // add this conjunctive feature and all it's kids to the whitelist.
+                traverseConjunctiveTree(whitelist, f);
+            }
+        }
+        return whitelist;
+    }
+
+
+	/**
+	 * Given a list of useless features, prune the entries from the lexicon.
+	 * @param uselessfeatures
+	 */
+	protected void pruneLexicon(int[] uselessfeatures) {
+		lexicon.discardPrunedFeatures(uselessfeatures);
+        for (Feature f : this.uselessFeatureNames) {
+            if (lexicon.contains(f)) {
+                throw new RuntimeException("The features were not correctly removed from the lexicon : " + f.getStringIdentifier());
+            }
+        }
+	}
+
+	/**
+	 * This method selects the features to be pruned. If weights
+	 * are needed, they must be passed to the constructor and stored in fields of
+	 * the implementing class. In this way, we make no assumptions about the 
+	 * structure of the weight classes.
+	 * @return
+	 */
+	abstract protected int[] identifyUselessFeatures();
+	
+	/**
+	 * Once we have identified the useless entries, we need to optimize the
+	 * model components.
+	 * @param uselessfeatures the indices of those features with no significant weights.
+	 * @param originalNumFeatures the number of features in the original lexicon.
+	 */
+	abstract public void pruneWeights(int[] uselessfeatures, int originalNumFeatures);
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java
new file mode 100644
index 00000000..44101b33
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java
@@ -0,0 +1,144 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Map.Entry;
+
+import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
+import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon;
+import edu.illinois.cs.cogcomp.lbjava.learn.LinearThresholdUnit;
+import edu.illinois.cs.cogcomp.lbjava.learn.SparseAveragedPerceptron;
+import gnu.trove.set.hash.TIntHashSet;
+
+/**
+ * This class will optimize any working LinearThresholdUnit subclass by pruning
+ * low value features.
+ * @author redman
+ */
+public class LinearThresholdUnitOptimizer extends LexiconOptimizer {
+
+    /** the LTU learner we want to optimize. */
+    private LinearThresholdUnit ltuLearner;
+
+    /** this also for testing, save feature names we will delete, check the names when we do. */
+    final ArrayList<Feature> uselessFeatureNames = new ArrayList<Feature>();
+
+    /**
+     * Given the LTU learner to optimize.
+     * @param snl the LTU learner.
+     */
+    public LinearThresholdUnitOptimizer(LinearThresholdUnit ltu) {
+        super(ltu.demandLexicon(), ltu.featurePruningThreshold);
+        ltuLearner = ltu;
+    }
+
+    /**
+     * Determine if the provided feature has sum of weights greater than a threshold value, 
+     * and discard the feature if it falls below.
+     * @param lex the lexicon.
+     * @param f the feature.
+     * @return true if the feature has any value, there is a
+     */
+    protected boolean hasWeight(Lexicon lex, Feature f) {
+        int featureindex = lex.lookup(f);
+        
+        // we assume each element of the network is of the same type, if that type is sparse averaged 
+        // perceptron, we check both the averaged and current weight
+        double sum;
+        if (this.ltuLearner instanceof SparseAveragedPerceptron) {
+            SparseAveragedPerceptron sap = (SparseAveragedPerceptron) this.ltuLearner;
+            double wt = sap.getWeightVector().getRawWeights().get(featureindex);
+            double avg = sap.getAveragedWeightVector().getRawWeights().get(featureindex);
+            sum = Math.abs(wt);
+            sum += Math.abs(avg);
+        } else {
+            double wt = this.ltuLearner.getWeightVector().getRawWeights().get(featureindex);
+            sum = Math.abs(wt);
+        }
+    
+        // if the value is sufficiently large, then we have a good weight and should keep.
+        if (sum > this.threshold)
+            return true;
+        else
+            return false;
+    }
+
+    /**
+     * In this case, we must check, for each feature, the associated set of weight in each weight
+     * vector, if they are all very small, it is useless. The array returned is sorted ascending.
+     * @return the set of useless features.
+     * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#identifyUselessFeatures()
+     */
+    @SuppressWarnings("unchecked")
+    @Override
+    protected int[] identifyUselessFeatures() {
+        Lexicon lex = this.ltuLearner.demandLexicon();
+        if (lex != null) {
+            HashSet<Feature> whitelist = compileWhitelist(lex);
+
+            // we have the conjunctive features, if left, right, or the parent itself has a non zero weight,
+            // consider non of the features (parent, left or right) useless, whitelist them.
+            int count = 0;
+            int numberfeatures = lex.size();
+            int[] all = new int[numberfeatures];
+            TIntHashSet defunct = new TIntHashSet();
+            for (Object e : lex.getMap().entrySet()) {
+                Entry<Feature, Integer> entry = (Entry<Feature, Integer>) e;
+                int fi = entry.getValue();
+                if (!whitelist.contains(entry.getKey())) {
+                    double wt = Math.abs(this.ltuLearner.getWeightVector().getRawWeights().get(fi));
+
+                    // if the value is sufficiently large, then we have a good weight and should keep.
+                    if (wt < this.threshold) {
+                        
+                        // This is a useless feature
+                        all[count] = fi;
+                        if (defunct.contains(fi)) {
+                            System.err.println("There was a feature discarded twice during feature pruning!");
+                        } else {
+                            defunct.add(fi);
+                        }
+                        this.uselessFeatureNames.add(entry.getKey());
+                        count++;
+                    }
+                }
+            }
+
+            int[] useless = new int[count];
+            System.arraycopy(all, 0, useless, 0, count);
+            Arrays.sort(useless);
+            return useless;
+        } else
+            return new int[0];
+    }
+
+    /**
+     * Check it out when done, make sure it worked.
+     */
+    protected void pruneLexicon(int[] uselessfeatures) {
+        super.pruneLexicon(uselessfeatures);
+        for (Feature f : this.uselessFeatureNames) {
+            if (lexicon.contains(f)) {
+                throw new RuntimeException("The features were not correctly removed from the lexicon : " + f.getStringIdentifier());
+            }
+        }
+    }
+
+    /**
+     * Not we remove the useless weights from ALL weight vectors. There must be the same number
+     * of entries in each weight vector as there is in the lexicon.
+     * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#pruneWeights(int[])
+     */
+    @Override
+    public void pruneWeights(int[] uselessfeatures, int origNumFeatures) {
+        this.ltuLearner.pruneWeights(uselessfeatures, origNumFeatures);
+    }
+}
\ No newline at end of file
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java
new file mode 100644
index 00000000..45cfe812
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java
@@ -0,0 +1,149 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Map.Entry;
+
+import edu.illinois.cs.cogcomp.core.datastructures.vectors.OVector;
+import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
+import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon;
+import edu.illinois.cs.cogcomp.lbjava.learn.LinearThresholdUnit;
+import edu.illinois.cs.cogcomp.lbjava.learn.SparseAveragedPerceptron;
+import edu.illinois.cs.cogcomp.lbjava.learn.SparseNetworkLearner;
+import gnu.trove.set.hash.TIntHashSet;
+
+/**
+ * This class will optimize the SparseNetworkLearner by discarding all features
+ * associated with no sufficiently high weight values. For the network learner, we
+ * much check the weights across all the binary learners to determin the value 
+ * of a particular feature.
+ * @author redman
+ */
+public class SparseNetworkOptimizer extends LexiconOptimizer {
+
+    /** the network learner we want to optimize. */
+    private SparseNetworkLearner networkLearner;
+
+    /**
+     * Given the sparse net learner to optimize.
+     * @param snl the sparse net learner.
+     */
+    public SparseNetworkOptimizer(SparseNetworkLearner snl) {
+        super(snl.demandLexicon(), snl.getBaseLTU().featurePruningThreshold);
+        networkLearner = snl;
+    }
+
+    /**
+     * Determine if the provided feature has sum of weights greater than a threshold value, 
+     * and discard the feature if it falls below.
+     * @param lex the lexicon.
+     * @param f the feature.
+     * @return true if the feature has any value, there is a
+     */
+    protected boolean hasWeight(Lexicon lex, Feature f) {
+        OVector net = networkLearner.getNetwork();
+        if (net.size() == 0) 
+            return false;
+        int numberclasses = net.size();
+        int i = 0;
+        double sum = 0;
+        int featureindex = lex.lookup(f);
+        
+        // we assume each element of the network is of the same type, if that type is sparse averaged 
+        // perceptron, we check both the averaged and current weight
+        if (net.get(0) instanceof SparseAveragedPerceptron) {
+            for (; i < numberclasses; ++i) {
+                SparseAveragedPerceptron sap = (SparseAveragedPerceptron) net.get(i);
+                double wt = sap.getWeightVector().getRawWeights().get(featureindex);
+                double avg = sap.getAveragedWeightVector().getRawWeights().get(featureindex);
+                sum += Math.abs(wt);
+                sum += Math.abs(avg);
+                
+                // if the value is sufficiently large, then we have a good weight and should keep.
+                if (sum >= this.threshold)
+                    return true;
+            }
+        } else {
+            for (; i < numberclasses; ++i) {
+                LinearThresholdUnit ltu = (LinearThresholdUnit) net.get(i);
+                double wt = ltu.getWeightVector().getRawWeights().get(featureindex);
+                sum += Math.abs(wt);
+                
+                // if the value is sufficiently large, then we have a good weight and should keep.
+                if (sum >= this.threshold)
+                    return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * In this case, we must check, for each feature, the associated set of weight in each weight
+     * vector, if they are all very small, it is useless. The array returned is sorted ascending.
+     * @return the set of useless features.
+     * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#identifyUselessFeatures()
+     */
+    @SuppressWarnings("unchecked")
+    @Override
+    protected int[] identifyUselessFeatures() {
+        Lexicon lex = networkLearner.demandLexicon();
+        if (lex != null) {
+
+            // we have the conjunctive features, if left, right, or the parent itself has a non zero weight,
+            // consider non of the features (parent, left or right) useless, whitelist them.
+            HashSet<Feature> whitelist = compileWhitelist(lex);
+            int count = 0;
+            int numberfeatures = lex.size();
+            int[] all = new int[numberfeatures];
+            TIntHashSet defunct = new TIntHashSet();
+            
+            // For each feature, determin it's value. We will interate over a map with features as key
+            // and the integer index of the feature. If the feature is whitelisted, we keep, otherwise
+            // check for uselessness and if so, add to the list.
+            for (Object e : lex.getMap().entrySet()) {
+                Entry<Feature, Integer> entry = (Entry<Feature, Integer>) e;
+                if (!whitelist.contains(entry.getKey())) {
+                    int fi = entry.getValue();
+                    if (!hasWeight(lexicon, entry.getKey())) {
+                        all[count] = fi;
+                        if (defunct.contains(fi)) {
+                            System.err.println("There was a feature discarded twice during feature pruning!");
+                        } else {
+                            defunct.add(fi);
+                        }
+                        
+                        this.uselessFeatureNames.add(entry.getKey());
+                        count++;
+                    }
+                }
+            }
+
+            int[] useless = new int[count];
+            System.arraycopy(all, 0, useless, 0, count);
+            Arrays.sort(useless);
+            return useless;
+        } else
+            return new int[0];
+    }
+
+    /**
+     * Not we remove the useless weights from ALL weight vectors. There must be the same number
+     * of entries in each weight vector as there is in the lexicon.
+     * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#pruneWeights(int[])
+     */
+    @Override
+    public void pruneWeights(int[] uselessfeatures, int origNumFeatures) {
+        OVector ltus = networkLearner.getNetwork();
+        for (int i = 0; i < ltus.size(); i++) {
+            LinearThresholdUnit ltu = (LinearThresholdUnit) ltus.get(i);
+            ltu.pruneWeights(uselessfeatures, origNumFeatures);
+        }
+    }
+}
\ No newline at end of file
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java
new file mode 100644
index 00000000..5b1fa976
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java
@@ -0,0 +1,125 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning;
+
+import java.util.*;
+import java.util.Map.Entry;
+
+import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
+import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon;
+import edu.illinois.cs.cogcomp.lbjava.learn.SupportVectorMachine;
+
+/**
+ * Optimized a support vector machine by discarding any sufficiently low weights.
+ * @author redman
+ */
+public class SupportVectorMachineOptimizer extends LexiconOptimizer {
+    
+    /** the model we are going to optimize. */
+    private SupportVectorMachine svm = null;
+    
+    /** the number of classes, if the numclasses is two, consider it binary and change to one. */
+    public int numberclasses = -1;
+    
+    /** the biasfeatures are 0 for no added bias features, or 1 if bias is added. */
+    public int biasfeatures = 0;
+    
+    /**
+     * Take lex and model, and optimize the model by pruning the weights. Any zero weights get pruned.
+     * @param lexicon the lexicon with the feature map.
+     * @param s the support vector machine.
+     */
+    public SupportVectorMachineOptimizer(SupportVectorMachine s) {
+        super(s.demandLexicon(), s.featurePruningThreshold);
+        this.svm = s;
+        
+        // the numClasses field gets change in the write method to allow for the binary case
+        // which is actually two classes to behave as one class (binary).
+        if (!s.getSolverType().equals("MCSVM_CS") && s.getNumClasses() <= 2)
+            numberclasses = 1;
+        else
+            numberclasses = s.getNumClasses();
+        
+        // we need to figure out if we have a bias feature introduced
+        this.biasfeatures = svm.getBiasFeatures();
+    }
+        
+    /**
+     * Determine if the provided feature has sum of weights greater than a threshold value, 
+     * and discard the feature if it falls below.
+     * @param lex the lexicon.
+     * @param f the feature.
+     * @return true if the feature has any value, there is a
+     */
+    protected boolean hasWeight(Lexicon lex, Feature f) {
+        int index = lex.lookup(f);
+        return getWeight(index) > this.threshold;
+    }
+    
+    /**
+     * Compute the single weight at the index as the sum of all weights for all classes.
+     * @param index the index of the feature
+     * @return the sum of the absolute value of all weights for the feature.
+     */
+    private double getWeight(int index) {
+        double sum = 0;
+        for (int i = 0; i < this.numberclasses; i++) {
+            sum += Math.abs(svm.getWeights()[index]);
+            index += (this.lexicon.size() + biasfeatures);
+        }
+        return sum;
+    }
+
+    /**
+     * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#identifyUselessFeatures()
+     */
+    @Override
+    protected int[] identifyUselessFeatures() {
+        
+        // compile the whitelist
+        HashSet<Feature> whitelist = compileWhitelist(lexicon);
+
+        // look at each feature in the lexicon, any with zero weights can be safely discarded.
+        int [] all = new int [this.lexicon.size()];
+        int count = 0;
+        for (Object e : lexicon.getMap().entrySet()) {
+            @SuppressWarnings("unchecked")
+            Entry<Feature, Integer> entry = (Entry<Feature, Integer>) e;
+            if (!whitelist.contains(entry.getKey())) {
+                int fi = entry.getValue();
+                double wt = getWeight(fi);
+                if (wt < this.threshold) {
+                    all[count] = fi;
+                    count++;
+                }
+            }
+        }
+        int[] useless = new int[count];
+        System.arraycopy(all, 0, useless, 0, count);
+        Arrays.sort(useless);
+        return useless;
+    }
+
+    /**
+     * This method returns the number of features. This implementation assumes the 
+     * lexicon is populated, but that's not always the case (with SVM for example appears
+     * to not always have a populated lexicon). In these cases, this method may be overriden.
+     * @return the number of featues.
+     */
+    protected int getNumberFeatures() {
+        return this.svm.getNumFeatures();
+    }
+    
+    /**
+     * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#pruneWeights(int[], int)
+     */
+    @Override
+    public void pruneWeights(int[] uselessfeatures, int originalNumFeatures) {
+        this.svm.pruneWeights(uselessfeatures, originalNumFeatures);
+    }
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java
new file mode 100644
index 00000000..c9b185bc
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java
@@ -0,0 +1,38 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+/**
+ * <p>For sparse learners, it is often the case that the array of features you learn 
+ * contains only a subset of useful features. When we leave these features in the lexicon,
+ * we end up with bloated lexicons and weight vectors. This leads to larger than necessary
+ * models.</p>
+ * 
+ * <p>This package contains an interface that defines the life cycle for the feature pruning
+ * process, as well as some implementations, one that takes multiple weight vectors (for
+ * multi-class network learners), and some that takes only one weight vector.</p>
+ * 
+ * <p>All optimizers should subclass @see LexiconOptimizer which implements most of the
+ * optimization. Subclass will need to provide methods to compute the weight value to compare
+ * against the threshold, a method to identify the useless features, and a method to prune
+ * those features.</p>
+ * 
+ * <p>The optimizers are invoked by the {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner#doneTraining}
+ * method of the Learner class when all learning is complete. For those learners that include a feature
+ * pruning implementation, they must override this method to invoke the optimizer. In this way, during the
+ * normal LBJava compile and model build cycle, the optimization is performed automatically. For those
+ * who have build their own training procedure, they are required to invoke the doneTraining and 
+ * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner#startTraining} method at appropriate points during
+ * their training process.</p>
+ * 
+ * <p>The learner classes typically have a parameter that can be set to change the default feature
+ * pruning threshold to any the user might choose, or it can be set to 0.0 to disable. </p>
+ * 
+ * <p>The pruning threshold value is provided by the specific learner, and should be, in one way or 
+ * another, parameterized.</p>
+ * @author redman
+ */
+package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning;
\ No newline at end of file
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ActThread.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ActThread.java
new file mode 100644
index 00000000..20929909
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ActThread.java
@@ -0,0 +1,71 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer.Range;
+
+/**
+ * This thread will compute a single activtion value, for each layer
+ * setup must be called to provide the output array, the layer and the
+ * input values.
+ * @author redman
+ */
+class ActThread extends PushThread {
+    
+    /** the input data. */
+    float[] currentInputs = null;
+    
+    /** the layer we are operating on. */
+    Layer layer = null;
+    
+    /** the resulting outputs are stored here, this array is shared
+     * by all threads activating on this layer. */
+    float [] layerActivations = null;
+    
+    /** used to make the name of the thread unique. */
+    private static int inc = 0;
+    
+    /**
+     * init with a mux.
+     * @param m the multiplexer.
+     */
+    ActThread() {
+        super("ActThread-"+(inc++));
+    }
+    
+    /** 
+     * before we start a layer, this is called to set up the thread.
+     * @param ci the input data.
+     * @param l the layer.
+     * @param la the layer actvation values.
+     * @param mux the multiplexer.
+     */
+    void setup(float[] ci, Layer l, float[] la) {
+        this.currentInputs = ci;
+        this.layer = l;
+        this.layerActivations = la;
+    }
+        
+    /**
+     * Run forever never quite.
+     */
+    public void run() {
+        synchronized (this) {
+            while(true) {
+                
+                // wait for the range object to be set.
+                Range r = this.getRange();
+                if (r == null)
+                    return;
+                for (int indx = r.start; indx < r.end; indx++) {
+                    layerActivations[indx] = layer.computeOneOutput(indx, currentInputs);
+                }
+            }
+        }
+    }
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Activator.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Activator.java
new file mode 100644
index 00000000..74b25f0a
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Activator.java
@@ -0,0 +1,25 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+/**
+ * Implementations will activate all the layers of the net and 
+ * produce a set of outputs. The one required method will return 
+ * all the output values. 
+ * @author redman
+ */
+public interface Activator {
+    
+    /**
+     * Activate the provided layer, return the resulting outputs.
+     * @param inputs the input data.
+     * @param layer the layer to supply the inputs to.
+     * @return the output values.
+     */
+    public float[] prediction(float[] inputs);
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ConvergenceMeasure.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ConvergenceMeasure.java
new file mode 100644
index 00000000..8237a18b
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ConvergenceMeasure.java
@@ -0,0 +1,27 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+/**
+ * Measure convergence, simplest implementation may simply run some number of epochs,
+ * more sophosticated will look some function of cumulative error going to zero at the 
+ * end of an epoch. Conversion is always measured at the end of a training cycle.
+ * @author redman
+ */
+public interface ConvergenceMeasure {
+
+    /**
+     * With the given inputs and outputs, evaluate the results of the last iteration, 
+     * determine the error, probably store that, and if convergence (what whatever measure)
+     * is achieved, return true, else return false.
+     * 
+     * @param learner the learner being used to train up the neural net, contains the cummulative error.
+     * @return true if converged.
+     */
+    public boolean evaluate(NNTrainingInterface learner);
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/DatasetReader.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/DatasetReader.java
new file mode 100644
index 00000000..acd54807
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/DatasetReader.java
@@ -0,0 +1,98 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.InputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+/**
+ * @author redman
+ *
+ */
+public class DatasetReader {
+    
+    /**
+     * flip the byte order.
+     * @param is input stream.
+     * @return the integer.
+     * @throws IOException 
+     */
+    private static int readInt(InputStream is) throws IOException {
+        int i0 = is.read();
+        int i1 = is.read();
+        int i2 = is.read();
+        int i3 = is.read();
+        return (i0<<24) + (i1<<16) + (i2<<8) + i3;
+    }
+    
+    /**
+     * get the examples form an NIST dataset, return everything at once. There are
+     * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats = 
+     * 47 million floats. These are input examples, so they are image data.
+     * @param filename
+     * @return the input examples.
+     * @throws IOException 
+     */
+    public static float[][] getExampleInputs(String filename) throws IOException {
+        InputStream dis = new BufferedInputStream(new FileInputStream(new File(filename)));
+        int m1 = readInt(dis);
+        if (m1 != 2051) 
+            throw new IOException("That was not an example file! magic code = "+m1);
+        int numExamples = readInt(dis);
+        if (numExamples != 60000) 
+            System.out.println("We expecting 60k examples "+m1);
+        int numRows = readInt(dis);
+        if (numRows != 28) 
+            System.out.println("We expecting 28 rows "+numRows);
+        int numColumns = readInt(dis);
+        if (numColumns != 28) 
+            System.out.println("We expecting 28 columns "+numColumns);
+        int totalpixels = numRows*numColumns;
+        float [][] examples = new float [numExamples][totalpixels];
+        for (int i = 0 ; i < examples.length; i++) {
+            for (int j = 0; j < totalpixels; j++) {
+                examples[i][j] = (float)(dis.read()/128f) - 1f;
+            }
+        }
+        return examples;
+    }
+    
+    /**
+     * get the examples form an NIST dataset, return everything at once. There are
+     * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats = 
+     * 47 million floats. These are input examples, so they are image data.
+     * @param filename
+     * @return the output examples.
+     * @throws IOException 
+     */
+    public static float[][] getExampleOutputs(String filename) throws IOException {
+        InputStream dis = new BufferedInputStream(new FileInputStream(new File(filename)));
+        int m1 = readInt(dis);
+        if (m1 != 2049) 
+            throw new IOException("That was not an example file! magic code = "+m1);
+        int numExamples = readInt(dis);
+        float [][] examples = new float [numExamples][1];
+        for (int i = 0 ; i < numExamples; i++) {
+            examples[i][0] = (float)(dis.read()/5f) - 1f;
+        }
+        return examples;
+    }
+
+    /**
+     * @param a
+     * @throws IOException
+     */
+    @SuppressWarnings("unused")
+    public static void main(String[]a) throws IOException {
+        float[][] examples = getExampleInputs("/Users/redman/Desktop/NNTrainingData/train-images-idx3-ubyte");
+        float[][] labels = getExampleOutputs("/Users/redman/Desktop/NNTrainingData/train-labels-idx1-ubyte");
+    }
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/EpochConvergenceMeasure.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/EpochConvergenceMeasure.java
new file mode 100644
index 00000000..a794cf6a
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/EpochConvergenceMeasure.java
@@ -0,0 +1,42 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+/**
+ * Terminate agfter so many iterations.
+ * @author redman
+ */
+public class EpochConvergenceMeasure implements ConvergenceMeasure {
+
+    /** the current epoch count. */
+    private int epoch = 0;
+    
+    /** the current epoch count. */
+    private int max;
+    
+    /**
+     * Takes the number of iterations.
+     * @param m the max iterations.
+     */
+    public EpochConvergenceMeasure(int m) {
+        this.max = m;
+    }
+    
+    /**
+     * @see edu.illinois.cs.cogcomp.lbjava.neuralnet.ConvergenceMeasure#evaluate(edu.illinois.cs.cogcomp.lbjava.neuralnet.NNTrainingInterface)
+     */
+    @Override
+    public boolean evaluate(NNTrainingInterface learner) {
+        epoch++;
+        if (epoch > max) {
+            return true;
+        } else
+            return false;
+    }
+
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Layer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Layer.java
new file mode 100644
index 00000000..29adfeb0
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Layer.java
@@ -0,0 +1,277 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+import java.io.PrintStream;
+import java.util.Random;
+
+/**
+ * This is a layer in a neural net. it is characterized by a number of inputs
+ * and a number of outputs. The neurons (perceptrons) are hidden within, I see
+ * no reason to expose them. this allows the layer class to do all computations
+ * across the entire layer in one pass, which is very efficient. Downside; Nothing
+ * in this implementation will allow you to assign per neuron attributes. Also, the 
+ * weights are represented by a primitive array, so only 32 bit indices meaning no 
+ * more than 2 ^ 32 weights are allowed.
+ * @author redman
+ */
+public class Layer {
+	
+	/** number of inputs to this layer. */
+	private int numberInputs;
+	
+	/** the number of outputs from this layer. */
+	private int numberOutputs;
+	
+    /** the neuron weights. */
+    private float[] weights;
+    
+    /** the derived outputs. */
+    private float[] dweights;
+    
+    /** collects output values. */
+    private float[] outputs;
+
+    /**
+     * The layer constructed.
+     * @param numIn the number of inputs.
+     * @param numOut the number of outputs.
+     */
+    public Layer(int numIn, int numOut) {
+        this(numIn, numOut, new Random());
+        outputs = new float[numOut];
+    }
+
+    /**
+     * The layer constructed.
+     * @param numIn the number of inputs.
+     * @param numOut the number of outputs.
+     * @param r the random num generator.
+     */
+    public Layer(int numIn, int numOut, Random r) {
+        this.numberInputs = numIn;
+        this.numberOutputs = numOut;
+        int wl = (numIn+1)*numOut;
+        weights = new float[wl];
+        dweights = new float[wl];
+        for (int i = 0; i < wl; i++)
+            weights [i] = (r.nextFloat() - 0.5f) * 4f;
+        outputs = new float[numOut];
+    }
+    
+    /**
+     * Compute the sigmoid first derivative.
+     * @param x the input value
+     * @return the sigmoid
+     */
+    final private float sigmoid(float x) {
+        return (float) (1.0 / (1.0 + Math.exp(-x)));
+    }
+
+    /**
+     * @return the weights
+     */
+    public float[] getWeights() {
+        return weights;
+    }
+
+    /**
+     * @param weights the weights to set
+     */
+    public void setWeights(float[] weights) {
+        this.weights = weights;
+    }
+
+    /**
+     * @return the numberInputs
+     */
+    public int getNumberInputs() {
+        return numberInputs;
+    }
+
+    /**
+     * @param numberInputs the numberInputs to set
+     */
+    public void setNumberInputs(int numberInputs) {
+        this.numberInputs = numberInputs;
+    }
+
+    /**
+     * @return the numberOutputs
+     */
+    public int getNumberOutputs() {
+        return numberOutputs;
+    }
+
+    /**
+     * @param numberOutputs the numberOutputs to set
+     */
+    public void setNumberOutputs(int numberOutputs) {
+        this.numberOutputs = numberOutputs;
+    }
+    
+    /**
+     * This granularity of method invocation is only necessary so parallelize
+     * the process.
+     * @param index the index of the input to compute the output for.
+     * @param inputs the inputs.
+     * @return the activation output.
+     */
+    final float computeOneOutput(int index, float[] inputs) {
+        float result = 0.0f;
+        int nI = this.numberInputs;
+        int start = index * (nI+1);
+        for (int k = 0 ; k < nI ; k++) {
+            result += weights[start+k] * inputs[k];
+        }
+        result += weights[start+nI];
+        return (float) sigmoid(result);
+    }
+    
+    /**
+     * Given a set of inputs, produce the set of activation
+     * values.
+     * @param inputs the inputs to produce the predictions for.
+     * @return the set of predictions.
+     */
+    final public float[] activate(float[] inputs) {
+        int nO = this.numberOutputs;
+        float[] o = this.outputs;
+        for (int j = 0 ; j < nO ; j++) {
+        	o[j] = this.computeOneOutput(j, inputs);
+        }
+        return outputs;
+    }
+
+    /**
+     * train up weights for just one output. Thread safety must be noted here, since everybody will be
+     * updating the nextError array at the same time. To avoid doing repeated synchronizations which are 
+     * expensive here, for multithreaded trainer, we pass in a dummy error array, update at will, then
+     * the caller is responsible for synchronizing on the real one and updating the shared sum error array.
+     * @param error the activation errors used to compute the backprop value.
+     * @param input the input date.
+     * @param output the computed output data.
+     * @param learningRate the learning rate.
+     * @param momentum the momentum
+     * @param nextError the array where the error values will be updated
+     * @param outIndex the output index;
+     */
+    final public void trainOne(float[] error, float[] input, float[] output, float learningRate, float momentum, float[] nextError, int outIndex) {
+        int woffset = (this.numberInputs+1) * outIndex;
+        float d = error[outIndex] * (output[outIndex] * (1 - output[outIndex]));
+        for (int j = 0; j < this.numberInputs; j++) {
+            int windx = woffset + j;
+            nextError[j] += weights[windx] * d;
+            float dw = input[j] * d * learningRate;
+            weights[windx] += dweights[windx] * momentum + dw;
+            dweights[windx] = dw;
+        }
+        
+        // compute the error for the bias, the fake bias input is always 1.
+        int windx = woffset + input.length;
+        nextError[input.length] += weights[windx] * d;
+        float dw = d * learningRate;
+        weights[windx] += dweights[windx] * momentum + dw;
+        dweights[windx] = dw;
+    }
+    
+    /**
+     * given a set of errors (errors from the next layer on), and adjust the weights
+     * to do a gradient descent.
+     * @param error the output errors.
+     * @param input the input data.
+     * @param output the desired output.
+     * @param learningRate the rate of learning.
+     * @param momentum helps to avoid local minima.
+     * @return the errors from this layer.
+     */
+    final public float[] train(float[] error, float[] input, float[] output, float learningRate, float momentum) {
+        int nI = this.numberInputs+1/*for the bias*/;
+        float[] nextError = new float[nI];
+        for (int i = 0; i < this.numberOutputs; i++) {
+            //this.trainOne(error, input, output, learningRate, momentum, nextError, i);
+            
+            int woffset = nI * i;
+            float d = error[i] * (output[i] * (1 - output[i]));
+            for (int j = 0; j < this.numberInputs; j++) {
+                int windx = woffset + j;
+                nextError[j] += weights[windx] * d;
+                float dw = input[j] * d * learningRate;
+                weights[windx] += dweights[windx] * momentum + dw;
+                dweights[windx] = dw;
+            }
+            
+            // compute the error for the bias, the fake bias input is always 1.
+            int windx = woffset + input.length;
+            nextError[input.length] += weights[windx] * d;
+            float dw = d * learningRate;
+            weights[windx] += dweights[windx] * momentum + dw;
+            dweights[windx] = dw;
+        }
+        return nextError;
+    }
+    
+    /**
+     * print out the weights.
+     */
+    public void print() {
+        System.out.print(this.numberInputs+":"+this.numberOutputs);
+        System.out.print(" ");
+        for (float w : weights) {
+            System.out.format(" %.8f",w);
+        }
+        System.out.print(" (");
+        for (float w : dweights) {
+            System.out.format(" %.8f",w);
+        }
+        System.out.println(")");
+    }
+
+    /**
+     * @return the dweights
+     */
+    public float[] getDweights() {
+        return dweights;
+    }
+
+    /**
+     * @param dweights the dweights to set
+     */
+    public void setDweights(float[] dweights) {
+        this.dweights = dweights;
+    }
+
+    /**
+     * used for reporting mostely.
+     */
+    public String toString() {
+        StringBuffer sb = new StringBuffer();
+        sb.append("in : "+this.numberInputs+" out : "+this.numberOutputs);
+        sb.append("\n");
+        for (int i = 0; i < weights.length;) {
+            for (int j = 0; j < this.numberInputs;j++,i++) {
+                sb.append("  "+weights[i]);
+                sb.append(" ");
+            }
+            sb.append("\n");
+        }
+        return sb.toString();
+    }
+
+	/**
+	 * Write the representation to a digital output stream.
+	 * @param out the output stream for serialization.
+	 */
+	public void write(PrintStream out) {
+		out.print(numberInputs);
+		out.print(numberOutputs);
+		out.print(weights.length);
+        for (int i = 0; i < weights.length; ++i)
+            out.print(weights[i]);
+	}
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/LearnerThread.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/LearnerThread.java
new file mode 100644
index 00000000..d3568330
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/LearnerThread.java
@@ -0,0 +1,96 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+import java.util.Arrays;
+
+import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer.Range;
+
+/**
+ * This thread will compute a single activtion value, for each layer
+ * setup must be called to provide the output array, the layer and the
+ * input values.
+ * @author redman
+ */
+class LearnerThread extends PushThread {
+    
+    /** the input error from the next layer being back propogated. */
+    float[] error = null;
+    
+    /** the input labeled data. */
+    float[] input = null;
+    
+    /** the input data. */
+    float[] output = null;
+    
+    /** the result error SHARED ACROSS THREADS, must be synced to update. */
+    float [] nextError;
+    
+    /** the space where updates to the errors will be set, later used to update nextError.*/
+    float [] errorWorkspace;
+    
+    /** the learning rate. */
+    float learnRate;
+    
+    /** the momentum. */
+    float momentum;
+    
+    /** the layer we are operating on. */
+    Layer layer = null;;
+    
+    /** the unique id. */
+    private static int inc = 0;
+    
+    /**
+     * The learning rate and momentum will not change, so we will take them initially.
+     * @param lR the learning rate.
+     * @param m the momentum.
+     * @param mux the multiplexer.
+     */
+    LearnerThread(float lR, float m) {
+        super("LearnerThread-"+(inc++));
+        this.learnRate = lR;
+        this.momentum = m;
+    }
+    
+    /** 
+     * before we start a layer, this is called to set up the thread.
+     * @param error the error from the next layer, used to calc this layers error.
+     * @param input the input data.
+     * @param output the result data.
+     * @param nextError put the next layers input error here.
+     * @param layer the layer we operate on.
+     */
+    void setup(float [] error, float [] input, float [] output, float[] nextError, Layer layer) {
+        this.error = error;
+        this.input = input;
+        this.output = output;
+        this.nextError = nextError;
+        this.layer = layer;
+        this.errorWorkspace = new float[nextError.length];
+        Arrays.fill(this.errorWorkspace, 0);
+    }
+    
+    /**
+     * Run till we complete the layer, then finish up.
+     */
+    public void run() {
+        synchronized (this) {
+            while(true) {
+                
+                // wait for the range object to be set.
+                Range r = this.getRange();
+                if (r == null)
+                    return;
+                for (int indx = r.start; indx < r.end; indx++) {
+                    layer.trainOne(error, input, output, learnRate, momentum, errorWorkspace, indx);
+                }
+            }
+        }
+    }
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/NNTrainingInterface.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/NNTrainingInterface.java
new file mode 100644
index 00000000..84e29922
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/NNTrainingInterface.java
@@ -0,0 +1,23 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+/**
+ * @author redman
+ */
+public interface NNTrainingInterface {
+	
+    /**
+     * Given a set of examples, and a set of desired outputs, train the network
+     * represented by the provided network layers the provided number of epochs.
+     * @param inputs the input data to train against.
+     * @param outputs the desired outputs.
+     * @param epochs the number of training iterations to run.
+     */
+    public void train(float[][] inputs, float[][]outputs, int epochs);
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/PushThread.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/PushThread.java
new file mode 100644
index 00000000..9dada667
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/PushThread.java
@@ -0,0 +1,84 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer.Range;
+
+/**
+ * Threads will operate on a range, this superclass contains that
+ * range and handles atomic synchronized access.
+ * @author redman
+ */
+public class PushThread extends Thread {
+
+    /** the range to operate on. */
+    protected Range range = null;
+    
+    /** set when this thread is waiting for input. */
+    private boolean idle = false;
+    /**
+     * the push thread takes the name ofthe thread, to pass to 
+     * the super.
+     * @param name the name of the thread.
+     */
+    PushThread(String name) {
+        super(name);
+    }
+    
+    /**
+     * set the range of things to operate on.
+     * @param range
+     */
+    synchronized void setRange(Range range) {
+        this.range = range;
+        this.notifyAll();
+    }
+    
+    /**
+     * call this when we are done.
+     */
+    synchronized void done() {
+        this.range = null;
+        this.interrupt();
+    }
+    
+    /**
+     * wait for the thread to complete it's run, it will set
+     * poised and block till it gets data.
+     */
+    final synchronized public void waitIdle() {
+        while(!idle || range != null)
+            try {
+                this.wait();
+            } catch (InterruptedException e) {
+            }
+    }
+    
+    /**
+     * wait for the next range. 
+     * @return the range.
+     */
+    final synchronized protected Range getRange() {
+        while (range == null)
+            try {
+                this.idle = true;
+                this.notify(); // somebody waiting for completion?
+                this.wait();
+            } catch (InterruptedException e) {
+                if (this.isInterrupted()) {
+                    System.out.println("Interrupted error.");
+                    return null;
+                }
+            }
+        Range r = range;
+        range = null;
+        this.idle = false;
+        return r;
+    }
+
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/SimpleNNTrainer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/SimpleNNTrainer.java
new file mode 100644
index 00000000..951c3144
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/SimpleNNTrainer.java
@@ -0,0 +1,150 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+import java.util.Random;
+
+/**
+ * This class will simply learn up the NeuralNet layers, single threaded.
+ * @author redman
+ */
+public class SimpleNNTrainer implements NNTrainingInterface {
+    
+    /** the layers of the neural network. */
+    private Layer[] layers;
+    
+    /** scales the weight deltas for each iteration. */
+    private float learningRate = .3f;
+    
+    /** this prevents local minimum capture. */
+    private float momentum = .6f;
+
+    /**
+     * Need the layer data, learning rate and momentum.
+     * @param l the layers of the neural net.
+     * @param rate the learning rate.
+     * @param mom the momentum.
+     */
+    public SimpleNNTrainer(Layer[] l, float rate, float mom) {
+        this.layers = l;
+        this.learningRate = rate;
+        this.momentum = mom;
+    }
+    
+    /**
+     * given an input set of example, compute the output values, also return all the
+     * activation values in between, return them all. The results will be in the last 
+     * vector in the returned array.
+     * @param inputs the inputs.
+     * @return the activation energies from all layers/
+     */
+    public final float [] classify(float[] inputs) {
+        
+        int layerCount = layers.length;
+        
+        // storage for each output of each layer, and the error computed for each activation.
+        float [][] activations = new float[layerCount][];
+        
+        // This array contains inputs from previous layer
+        float [] currentinputs = inputs;
+        for (int i = 0 ; i < layerCount ; i++) {
+            
+            // compute the activations for this layer.
+            Layer layer = layers[i];
+            activations[i] = layer.activate(currentinputs);
+            currentinputs = activations[i];
+        }
+        return activations[layerCount-1];
+    }
+    
+    /**
+     * given an input set of example, compute the output values, also return all the
+     * activation values in between, return them all. The results will be in the last 
+     * vector in the returned array.
+     * @param inputs the inputs.
+     * @return the activation energies from all layers/
+     */
+    public final float [][] activate(float[] inputs) {
+        
+        int layerCount = layers.length;
+        
+        // storage for each output of each layer, and the error computed for each activation.
+        float [][] activations = new float[layerCount][];
+        
+        // This array contains inputs from previous layer
+        float [] currentinputs = inputs;
+        for (int i = 0 ; i < layerCount ; i++) {
+            
+            // compute the activations for this layer.
+            Layer layer = layers[i];
+            activations[i] = layer.activate(currentinputs);
+            currentinputs = activations[i];
+        }
+        return activations;
+    }
+    
+    /**
+     * Train with one example.
+     * @param inputs input data.
+     * @param outputs the labeled data.
+     * @param epochs
+     */
+    public void train(float[] inputs, float[]outputs) {
+        // storage for each output of each layer, and the error computed for each activation.
+        float [][] activations = this.activate(inputs);
+
+        // now we have all the activations.
+        float[] calcOut = activations[activations.length-1];
+        int errlen = calcOut.length;
+        float [] error = new float[errlen];
+        for (int i = 0; i < errlen; i++) {
+            error[i] = outputs[i] - calcOut[i]; // negative error
+        }
+        for (int i = layers.length - 1; i > 0; i--) {
+            error = layers[i].train(error,activations[i-1],activations[i], this.learningRate, this.momentum);
+        }
+        error = layers[0].train(error,inputs, activations[0], this.learningRate, this.momentum);
+    }
+    /**
+     * Execute the given number of epochs, then exit whatever the error.
+     * @param inputs the input examples.
+     * @param outputs the labels.
+     * @param layers
+     */
+    @Override
+    public void train(float[][] inputs, float[][]outputs, int epochs) {
+
+        // error checking. 
+        if (inputs.length != outputs.length)
+            throw new RuntimeException("There must be the same number of input data records and output data records.");
+        int totalInputs = inputs.length;
+        
+        // set up our counts.
+        int layerCount = layers.length;
+        Random r = new Random(34565);
+        for (int epoch = 0; epoch < epochs; epoch++) {
+            for (int inindx = 0; inindx < totalInputs; inindx++) {
+                int iI = r.nextInt(totalInputs);
+                // storage for each output of each layer, and the error computed for each activation.
+                float [][] activations = this.activate(inputs[iI]);
+
+                // now we have all the activations.
+                float[] calcOut = activations[layerCount-1];
+                int errlen = calcOut.length;
+                float [] error = new float[errlen];
+                for (int i = 0; i < errlen; i++) {
+                    error[i] = outputs[iI][i] - calcOut[i]; // negative error
+                }
+                for (int i = layers.length - 1; i > 0; i--) {
+                    error = layers[i].train(error,activations[i-1],activations[i], this.learningRate, this.momentum);
+                }
+                error = layers[0].train(error,inputs[iI],activations[0], this.learningRate, this.momentum);
+            }
+        }
+    }
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ThreadedNNTrainer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ThreadedNNTrainer.java
new file mode 100644
index 00000000..01980fb2
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ThreadedNNTrainer.java
@@ -0,0 +1,338 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
+
+import java.util.Arrays;
+import java.util.Random;
+
+/**
+ * This class will simply learn up the NeuralNet layers, single threaded.
+ * @author redman
+ */
+public class ThreadedNNTrainer implements NNTrainingInterface {
+
+    /** the layers of the neural network. */
+    private Layer[] layers;
+
+    /** scales the weight deltas for each iteration. */
+    private float learningRate = .3f;
+
+    /** this prevents local minimum capture. */
+    private float momentum = .6f;
+
+    /** this is the number of threads we will use, by default, number of processors on the machine. */
+    private int numThreads = Runtime.getRuntime().availableProcessors();
+
+    /**
+     * Need the layer data, learning rate and momentum.
+     * @param l the layers of the neural net.
+     * @param rate the learning rate.
+     * @param mom the momentum.
+     */
+    public ThreadedNNTrainer(Layer[] l, float rate, float mom) {
+        this.layers = l;
+        this.learningRate = rate;
+        this.momentum = mom;
+    }
+
+    /**
+     * Need the layer data, learning rate and momentum.
+     * @param l the layers of the neural net.
+     * @param rate the learning rate.
+     * @param mom the momentum.
+     * @param numThreads number of threads to deploy.
+     */
+    public ThreadedNNTrainer(Layer[] l, float rate, float mom, int numThreads) {
+        this.layers = l;
+        this.learningRate = rate;
+        this.momentum = mom;
+        this.numThreads = numThreads;
+    }
+    
+    /**
+     * given an input set of example, compute the output values, also return all the
+     * activation values in between, return them all. The results will be in the last 
+     * vector in the returned array.
+     * @param inputs the inputs.
+     * @return the activation energies from all layers/
+     */
+    public final float [][] activate(float[] inputs) {
+        
+        int layerCount = layers.length;
+        
+        // storage for each output of each layer, and the error computed for each activation.
+        float [][] activations = new float[layerCount][];
+        
+        // This array contains inputs from previous layer
+        float [] currentinputs = inputs;
+        for (int i = 0 ; i < layerCount ; i++) {
+            
+            // compute the activations for this layer.
+            Layer layer = layers[i];
+            activations[i] = layer.activate(currentinputs);
+            currentinputs = activations[i];
+        }
+        return activations;
+    }
+
+    /**
+     * Execute the given number of epochs, then exit whatever the error.
+     * @param inputs the input examples.
+     * @param outputs the labels.
+     * @param epochs the number of iterations to perform.
+     */
+    @Override
+    final public void train(float[][] inputs, float[][] outputs, int epochs) {
+        // error checking. 
+        if (inputs.length != outputs.length)
+            throw new RuntimeException("There must be the same number of input data records and output data records.");
+
+        // iterate this number of times.
+        int numExamples = inputs.length;
+
+        // For each layer, compute the ranges of indices to operate on. This will allow us to 
+        // continue computing on a thread without handshakes.
+        int ll = layers.length;
+        Range[][] ranges = new Range[ll][];
+        for (int i = 0; i < ll ; i++) {
+            Layer l = layers[i];
+            int no = l.getNumberOutputs();
+            int increment = no / numThreads;
+            int onsies;
+            if (increment == 0) {
+                onsies = no;
+                ranges[i] = new Range[onsies];
+            } else {
+                onsies = no % numThreads;
+                ranges[i] = new Range[numThreads];
+            }
+            int start = 0;
+            for (int j = 0 ; j < ranges[i].length && start < no; j++) {
+                int end = start + increment;
+                if (onsies != 0) {
+                    end++;
+                    onsies--;
+                }
+                ranges[i][j] = new Range(start, end);
+                start = end;
+            }
+        }
+        
+        // create the threads to run against the activation mux.
+        ActThread[] actThreads = new ActThread[numThreads];
+        for (int i = 0; i < numThreads; i++) {
+            actThreads[i] = new ActThread();
+            actThreads[i].start();
+        }
+
+        // create the threads to run against the activation mux.
+        LearnerThread[] learnerThreads = new LearnerThread[numThreads];
+        for (int i = 0; i < numThreads; i++) {
+            learnerThreads[i] = new LearnerThread(this.learningRate, this.momentum);
+            learnerThreads[i].start();
+        }
+
+        // set up our counts.
+        int layerCount = layers.length;
+
+        // storage for each output of each layer, and the error computed for each activation.
+        float[][] activations = new float[layerCount][];
+        for (int i = 0; i < layerCount; i++) {
+            activations[i] = new float[layers[i].getNumberOutputs()];
+        }
+
+        Thread.yield();
+        Thread.yield();
+        Thread.yield();
+        Random r = new Random(34565);
+
+       // do the specified number of epochs.
+        for (int epoch = 0; epoch < epochs; epoch++) {
+            for (int inindx = 0; inindx < numExamples; inindx++) {                
+                int iI = r.nextInt(numExamples);
+
+                // zero activations
+                for (int i = 0; i < layerCount; i++) {
+                    Arrays.fill(activations[i], 0.0f);
+                }
+
+                // This array contains inputs from previous layer output
+                float[] currentinputs = inputs[iI];
+
+                // for each layer, do the activations.
+                for (int i = 0; i < layerCount; i++) {
+                    Layer layer = layers[i];
+
+                    // set up the threads
+                    float[] acts = activations[i];
+                    int rl = ranges[i].length;
+                    for (int j = 0; j < rl; j++) {
+                        actThreads[j].setup(currentinputs, layer, acts);
+                        actThreads[j].setRange(ranges[i][j]);
+                    }
+
+                    // wait for them to finish.
+                    for (int j = 0; j < rl; j++) {
+                        actThreads[j].waitIdle();
+                    }
+                    currentinputs = acts;
+                }
+
+                //////////////////////////////////
+                // compute output errors.
+                // now we have all the activations, lets do error propogation.
+                float[] calcOut = activations[layerCount - 1];                
+                int errlen = calcOut.length;
+                float[] error = new float[errlen];
+                for (int i = 0; i < errlen; i++) {
+                    error[i] = outputs[iI][i] - calcOut[i]; // negative error
+                }
+
+                //////////////////////////////////
+                // propogate the errors back and adjust weights.
+                // now learn from out errors.
+                for (int i = layerCount - 1; i > 0; i--) {
+                    Layer layer = layers[i];
+                    int nI = layer.getNumberInputs() + 1/*for the bias*/;
+                    float[] nextError = new float[nI];
+
+                    // set up the threads
+                    int rl = ranges[i].length;
+                    for (int j = 0; j < rl; j++) {
+                        learnerThreads[j].setup(error, activations[i - 1], activations[i], nextError, layer);
+                        learnerThreads[j].setRange(ranges[i][j]);
+                    }
+
+                    // wait for complete, then set up next layer run.
+                    // wait for them to finish.
+                    for (int j = 0; j < rl; j++) {
+                        learnerThreads[j].waitIdle();
+                    }
+
+                    // now we must sum all the errors for each of the threads.
+                    int esize = nextError.length;
+                    for (int ei = 0; ei < esize; ei++) {
+                        for (int j = 0; j < rl; j++) {
+                            nextError[ei] += learnerThreads[j].errorWorkspace[ei];
+                        }
+                    }
+                    error = nextError;
+                }
+
+                // The setup for the first layer is computed using the actual inputs, so we do this
+                // a bit differently.
+                Layer layer = layers[0];
+                int rl = ranges[0].length;
+                int nI = layer.getNumberInputs() + 1/*for the bias*/;
+                float[] nextError = new float[nI];
+                for (int j = 0; j < rl; j++) {
+                    learnerThreads[j].setup(error, inputs[iI], activations[0], nextError, layer);
+                    learnerThreads[j].setRange(ranges[0][j]);
+                }
+
+                // wait for complete, then set up next layer run.
+                // wait for them to finish.
+                for (int j = 0; j < rl; j++) {
+                    learnerThreads[j].waitIdle();
+                }
+            }            
+            
+            // check for convergence.
+            float sumerr = 0;
+            for (int inputIdx = 0; inputIdx < outputs.length; inputIdx++) {
+                
+                // storage for each output of each layer, and the error computed for each activation.
+                float [][] a = this.activate(inputs[inputIdx]);
+                float[] outs = a[layerCount-1];                
+                float pred = outs[0];
+                float label = outputs[inputIdx][0];
+                sumerr = pred > label ? pred - label : label - pred;
+            }
+            System.out.format("%d) error = %.18f\n",epoch,(sumerr/(float)outputs.length));          
+        }
+    }
+
+    /** just holds range of datums to operate on. */
+    static class Range {
+        int start;
+        int end;
+        Range(int s, int e) {
+            start = s;
+            end = e;
+        }
+        public String toString() {
+            return start+"-"+end;
+        }
+    }
+    
+    /**
+     * this class coordinates the activities of a set of threads by handing
+     * out indexes that need operated on in a threadsafe way. If a request is made
+     * for an index, and non are available, the thread will wait until notified.
+     * @author redman
+     */
+    static class Multiplexer {
+
+        /** these are the ranges for the layer we operate on, these inited once and reused each epoch. */
+        private Range[] ranges = null;
+        
+        /** the number of elements we are counting down from. */
+        private int count = 0;
+
+        /** number of threads operating. */
+        private int waiting = 0;
+
+        /** the number of threads sharing this multiplexer. */
+        private int numThreads = 0;
+        
+        /**
+         * We need the number of elements in the layer to operate on.
+         * @param numThreads the total number of threads.
+         */
+        Multiplexer(int numThreads) {
+            this.numThreads = numThreads;
+        }
+
+        /**
+         * Start this process. This should be called by the main thread where 
+         * coordination occures. This will be accessed by the done method.
+         * @param ranges the range of indices to operate on.
+         * @param compLock use this as a semaphor
+         */
+        synchronized void startAndWait(Range[] ranges) {
+            this.count = 0;
+            this.ranges = ranges;
+            this.waiting = 0;
+            this.notifyAll();
+            while (waiting != numThreads) {
+                try {
+                    this.wait();
+                } catch (InterruptedException e1) {
+                }
+            }
+        }
+
+        /**
+         * get the next available index, or block till one is available.
+         * @return the index.
+         */
+        synchronized Range getNextIndex() {
+            while (ranges == null || count == ranges.length) {
+                try {
+                    this.waiting++;
+                    if (waiting == numThreads)
+                        this.notifyAll();
+                    this.wait();
+                    this.waiting--;
+                } catch (InterruptedException e) {
+                }
+            }
+            return ranges[count++];
+        }
+    }
+}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/package-info.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/package-info.java
new file mode 100644
index 00000000..c8623369
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/package-info.java
@@ -0,0 +1,21 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+/**
+ *  This package contains the Neural Network implemented employed by LBJava. This
+ * implementation supports bias, momentum and back prop, and is designed with
+ * efficiency in mind. The implementation contract includes an API for trainers
+ * {@see NNTrainingInterface} that defines the API for the any trainers. A single
+ * threaded trainer is provided. There is also a multithreaded trainer, which helps
+ * when there are a very large number of weights between layers.<p>
+ * 
+ * There is also a {@see Layer} class which implements functionality specific 
+ * to neural net layers within the system. However, there is no representation of
+ * neuron within the API, this was decided upon to ensure good performance.
+ * @author redman
+  */
+package edu.illinois.cs.cogcomp.lbjava.neuralnet;
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java
index 67545bf3..b530eb9a 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java
@@ -7,6 +7,7 @@
  */
 package edu.illinois.cs.cogcomp.lbjava.util;
 
+import java.lang.reflect.Array;
 import java.util.Arrays;
 
 import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream;
@@ -190,6 +191,32 @@ public Feature remove(int i) {
     }
 
 
+    /**
+     * Remove all the features specfied by the indices. This is MUCH faster 
+     * than removing them one at a time.
+     *
+     * @param indexes The indexes of the elements to remove.
+     **/
+    public void remove(int[] indexes) {
+        Arrays.sort(indexes);
+        int sourceindex = 0;
+        int discardindex = 0;
+        for (int targetindex = 0; targetindex < size; targetindex++) {
+            if (discardindex < indexes.length && targetindex == indexes[discardindex]) {
+                // skip this one (by simply not coping it and not inc the sourceindex), inc discardindex
+                discardindex++;
+            } else {
+                vector[sourceindex] = vector[targetindex];
+                sourceindex++;
+            }
+        }
+        if (discardindex != indexes.length)
+            // this should nver happen.
+            throw new RuntimeException("There was a problem removing some of the indexes!");
+        size -= indexes.length;
+    }
+
+
     /** Returns the value of {@link #size}. */
     public int size() {
         return size;
diff --git a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java
new file mode 100644
index 00000000..e5202d51
--- /dev/null
+++ b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java
@@ -0,0 +1,20 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava;
+
+import static org.junit.Assert.*;
+
+import org.junit.Test;
+
+public class SparseNetworkLearningPruneTest {
+
+	@Test
+	public void test() {
+	}
+
+}
diff --git a/pom.xml b/pom.xml
index 19dea5ef..db0a0881 100644
--- a/pom.xml
+++ b/pom.xml
@@ -7,7 +7,7 @@
     <groupId>edu.illinois.cs.cogcomp</groupId>
     <artifactId>lbjava-project</artifactId>
     <packaging>pom</packaging>
-    <version>1.2.26</version>
+    <version>1.3.1</version>
 
     <modules>
         <module>lbjava</module>
@@ -33,11 +33,11 @@
         <repository>
             <id>CogcompSoftware</id>
             <name>CogcompSoftware</name>
-            <url>scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/m2repo</url>
+            <url>scp://legolas.cs.illinois.edu:/srv/data/cogcomp/html/m2repo</url>
         </repository>
         <site>
             <id>CogcompSoftwareDoc</id>
-            <url>scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/software/doc/${project.artifactId}</url>
+            <url>scp://legolas.cs.illinois.edu:/srv/data/cogcomp/html/software/doc/</url>
         </site>
     </distributionManagement>